1 /*! 2 Crate `walkdir` provides an efficient and cross platform implementation 3 of recursive directory traversal. Several options are exposed to control 4 iteration, such as whether to follow symbolic links (default off), limit the 5 maximum number of simultaneous open file descriptors and the ability to 6 efficiently skip descending into directories. 7 8 To use this crate, add `walkdir` as a dependency to your project's 9 `Cargo.toml`: 10 11 ```toml 12 [dependencies] 13 walkdir = "2" 14 ``` 15 16 # From the top 17 18 The [`WalkDir`] type builds iterators. The [`DirEntry`] type describes values 19 yielded by the iterator. Finally, the [`Error`] type is a small wrapper around 20 [`std::io::Error`] with additional information, such as if a loop was detected 21 while following symbolic links (not enabled by default). 22 23 [`WalkDir`]: struct.WalkDir.html 24 [`DirEntry`]: struct.DirEntry.html 25 [`Error`]: struct.Error.html 26 [`std::io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html 27 28 # Example 29 30 The following code recursively iterates over the directory given and prints 31 the path for each entry: 32 33 ```no_run 34 use walkdir::WalkDir; 35 # use walkdir::Error; 36 37 # fn try_main() -> Result<(), Error> { 38 for entry in WalkDir::new("foo") { 39 println!("{}", entry?.path().display()); 40 } 41 # Ok(()) 42 # } 43 ``` 44 45 Or, if you'd like to iterate over all entries and ignore any errors that 46 may arise, use [`filter_map`]. (e.g., This code below will silently skip 47 directories that the owner of the running process does not have permission to 48 access.) 49 50 ```no_run 51 use walkdir::WalkDir; 52 53 for entry in WalkDir::new("foo").into_iter().filter_map(|e| e.ok()) { 54 println!("{}", entry.path().display()); 55 } 56 ``` 57 58 [`filter_map`]: https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.filter_map 59 60 # Example: follow symbolic links 61 62 The same code as above, except [`follow_links`] is enabled: 63 64 ```no_run 65 use walkdir::WalkDir; 66 # use walkdir::Error; 67 68 # fn try_main() -> Result<(), Error> { 69 for entry in WalkDir::new("foo").follow_links(true) { 70 println!("{}", entry?.path().display()); 71 } 72 # Ok(()) 73 # } 74 ``` 75 76 [`follow_links`]: struct.WalkDir.html#method.follow_links 77 78 # Example: skip hidden files and directories on unix 79 80 This uses the [`filter_entry`] iterator adapter to avoid yielding hidden files 81 and directories efficiently (i.e. without recursing into hidden directories): 82 83 ```no_run 84 use walkdir::{DirEntry, WalkDir}; 85 # use walkdir::Error; 86 87 fn is_hidden(entry: &DirEntry) -> bool { 88 entry.file_name() 89 .to_str() 90 .map(|s| s.starts_with(".")) 91 .unwrap_or(false) 92 } 93 94 # fn try_main() -> Result<(), Error> { 95 let walker = WalkDir::new("foo").into_iter(); 96 for entry in walker.filter_entry(|e| !is_hidden(e)) { 97 println!("{}", entry?.path().display()); 98 } 99 # Ok(()) 100 # } 101 ``` 102 103 [`filter_entry`]: struct.IntoIter.html#method.filter_entry 104 */ 105 106 #![deny(missing_docs)] 107 #![allow(unknown_lints)] 108 109 #[cfg(doctest)] 110 doc_comment::doctest!("../README.md"); 111 112 use std::cmp::{min, Ordering}; 113 use std::fmt; 114 use std::fs::{self, ReadDir}; 115 use std::io; 116 use std::iter; 117 use std::path::{Path, PathBuf}; 118 use std::result; 119 use std::vec; 120 121 use same_file::Handle; 122 123 pub use crate::dent::DirEntry; 124 #[cfg(unix)] 125 pub use crate::dent::DirEntryExt; 126 pub use crate::error::Error; 127 128 mod dent; 129 mod error; 130 #[cfg(test)] 131 mod tests; 132 mod util; 133 134 /// Like try, but for iterators that return [`Option<Result<_, _>>`]. 135 /// 136 /// [`Option<Result<_, _>>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html 137 macro_rules! itry { 138 ($e:expr) => { 139 match $e { 140 Ok(v) => v, 141 Err(err) => return Some(Err(From::from(err))), 142 } 143 }; 144 } 145 146 /// A result type for walkdir operations. 147 /// 148 /// Note that this result type embeds the error type in this crate. This 149 /// is only useful if you care about the additional information provided by 150 /// the error (such as the path associated with the error or whether a loop 151 /// was dectected). If you want things to Just Work, then you can use 152 /// [`io::Result`] instead since the error type in this package will 153 /// automatically convert to an [`io::Result`] when using the [`try!`] macro. 154 /// 155 /// [`io::Result`]: https://doc.rust-lang.org/stable/std/io/type.Result.html 156 /// [`try!`]: https://doc.rust-lang.org/stable/std/macro.try.html 157 pub type Result<T> = ::std::result::Result<T, Error>; 158 159 /// A builder to create an iterator for recursively walking a directory. 160 /// 161 /// Results are returned in depth first fashion, with directories yielded 162 /// before their contents. If [`contents_first`] is true, contents are yielded 163 /// before their directories. The order is unspecified but if [`sort_by`] is 164 /// given, directory entries are sorted according to this function. Directory 165 /// entries `.` and `..` are always omitted. 166 /// 167 /// If an error occurs at any point during iteration, then it is returned in 168 /// place of its corresponding directory entry and iteration continues as 169 /// normal. If an error occurs while opening a directory for reading, then it 170 /// is not descended into (but the error is still yielded by the iterator). 171 /// Iteration may be stopped at any time. When the iterator is destroyed, all 172 /// resources associated with it are freed. 173 /// 174 /// [`contents_first`]: struct.WalkDir.html#method.contents_first 175 /// [`sort_by`]: struct.WalkDir.html#method.sort_by 176 /// 177 /// # Usage 178 /// 179 /// This type implements [`IntoIterator`] so that it may be used as the subject 180 /// of a `for` loop. You may need to call [`into_iter`] explicitly if you want 181 /// to use iterator adapters such as [`filter_entry`]. 182 /// 183 /// Idiomatic use of this type should use method chaining to set desired 184 /// options. For example, this only shows entries with a depth of `1`, `2` or 185 /// `3` (relative to `foo`): 186 /// 187 /// ```no_run 188 /// use walkdir::WalkDir; 189 /// # use walkdir::Error; 190 /// 191 /// # fn try_main() -> Result<(), Error> { 192 /// for entry in WalkDir::new("foo").min_depth(1).max_depth(3) { 193 /// println!("{}", entry?.path().display()); 194 /// } 195 /// # Ok(()) 196 /// # } 197 /// ``` 198 /// 199 /// [`IntoIterator`]: https://doc.rust-lang.org/stable/std/iter/trait.IntoIterator.html 200 /// [`into_iter`]: https://doc.rust-lang.org/nightly/core/iter/trait.IntoIterator.html#tymethod.into_iter 201 /// [`filter_entry`]: struct.IntoIter.html#method.filter_entry 202 /// 203 /// Note that the iterator by default includes the top-most directory. Since 204 /// this is the only directory yielded with depth `0`, it is easy to ignore it 205 /// with the [`min_depth`] setting: 206 /// 207 /// ```no_run 208 /// use walkdir::WalkDir; 209 /// # use walkdir::Error; 210 /// 211 /// # fn try_main() -> Result<(), Error> { 212 /// for entry in WalkDir::new("foo").min_depth(1) { 213 /// println!("{}", entry?.path().display()); 214 /// } 215 /// # Ok(()) 216 /// # } 217 /// ``` 218 /// 219 /// [`min_depth`]: struct.WalkDir.html#method.min_depth 220 /// 221 /// This will only return descendents of the `foo` directory and not `foo` 222 /// itself. 223 /// 224 /// # Loops 225 /// 226 /// This iterator (like most/all recursive directory iterators) assumes that 227 /// no loops can be made with *hard* links on your file system. In particular, 228 /// this would require creating a hard link to a directory such that it creates 229 /// a loop. On most platforms, this operation is illegal. 230 /// 231 /// Note that when following symbolic/soft links, loops are detected and an 232 /// error is reported. 233 #[derive(Debug)] 234 pub struct WalkDir { 235 opts: WalkDirOptions, 236 root: PathBuf, 237 } 238 239 struct WalkDirOptions { 240 follow_links: bool, 241 follow_root_links: bool, 242 max_open: usize, 243 min_depth: usize, 244 max_depth: usize, 245 sorter: Option< 246 Box< 247 dyn FnMut(&DirEntry, &DirEntry) -> Ordering 248 + Send 249 + Sync 250 + 'static, 251 >, 252 >, 253 contents_first: bool, 254 same_file_system: bool, 255 } 256 257 impl fmt::Debug for WalkDirOptions { fmt( &self, f: &mut fmt::Formatter<'_>, ) -> result::Result<(), fmt::Error>258 fn fmt( 259 &self, 260 f: &mut fmt::Formatter<'_>, 261 ) -> result::Result<(), fmt::Error> { 262 let sorter_str = if self.sorter.is_some() { 263 // FnMut isn't `Debug` 264 "Some(...)" 265 } else { 266 "None" 267 }; 268 f.debug_struct("WalkDirOptions") 269 .field("follow_links", &self.follow_links) 270 .field("follow_root_link", &self.follow_root_links) 271 .field("max_open", &self.max_open) 272 .field("min_depth", &self.min_depth) 273 .field("max_depth", &self.max_depth) 274 .field("sorter", &sorter_str) 275 .field("contents_first", &self.contents_first) 276 .field("same_file_system", &self.same_file_system) 277 .finish() 278 } 279 } 280 281 impl WalkDir { 282 /// Create a builder for a recursive directory iterator starting at the 283 /// file path `root`. If `root` is a directory, then it is the first item 284 /// yielded by the iterator. If `root` is a file, then it is the first 285 /// and only item yielded by the iterator. If `root` is a symlink, then it 286 /// is always followed for the purposes of directory traversal. (A root 287 /// `DirEntry` still obeys its documentation with respect to symlinks and 288 /// the `follow_links` setting.) new<P: AsRef<Path>>(root: P) -> Self289 pub fn new<P: AsRef<Path>>(root: P) -> Self { 290 WalkDir { 291 opts: WalkDirOptions { 292 follow_links: false, 293 follow_root_links: true, 294 max_open: 10, 295 min_depth: 0, 296 max_depth: ::std::usize::MAX, 297 sorter: None, 298 contents_first: false, 299 same_file_system: false, 300 }, 301 root: root.as_ref().to_path_buf(), 302 } 303 } 304 305 /// Set the minimum depth of entries yielded by the iterator. 306 /// 307 /// The smallest depth is `0` and always corresponds to the path given 308 /// to the `new` function on this type. Its direct descendents have depth 309 /// `1`, and their descendents have depth `2`, and so on. min_depth(mut self, depth: usize) -> Self310 pub fn min_depth(mut self, depth: usize) -> Self { 311 self.opts.min_depth = depth; 312 if self.opts.min_depth > self.opts.max_depth { 313 self.opts.min_depth = self.opts.max_depth; 314 } 315 self 316 } 317 318 /// Set the maximum depth of entries yield by the iterator. 319 /// 320 /// The smallest depth is `0` and always corresponds to the path given 321 /// to the `new` function on this type. Its direct descendents have depth 322 /// `1`, and their descendents have depth `2`, and so on. 323 /// 324 /// Note that this will not simply filter the entries of the iterator, but 325 /// it will actually avoid descending into directories when the depth is 326 /// exceeded. max_depth(mut self, depth: usize) -> Self327 pub fn max_depth(mut self, depth: usize) -> Self { 328 self.opts.max_depth = depth; 329 if self.opts.max_depth < self.opts.min_depth { 330 self.opts.max_depth = self.opts.min_depth; 331 } 332 self 333 } 334 335 /// Follow symbolic links. By default, this is disabled. 336 /// 337 /// When `yes` is `true`, symbolic links are followed as if they were 338 /// normal directories and files. If a symbolic link is broken or is 339 /// involved in a loop, an error is yielded. 340 /// 341 /// When enabled, the yielded [`DirEntry`] values represent the target of 342 /// the link while the path corresponds to the link. See the [`DirEntry`] 343 /// type for more details. 344 /// 345 /// [`DirEntry`]: struct.DirEntry.html follow_links(mut self, yes: bool) -> Self346 pub fn follow_links(mut self, yes: bool) -> Self { 347 self.opts.follow_links = yes; 348 self 349 } 350 351 /// Follow symbolic links if these are the root of the traversal. 352 /// By default, this is enabled. 353 /// 354 /// When `yes` is `true`, symbolic links on root paths are followed 355 /// which is effective if the symbolic link points to a directory. 356 /// If a symbolic link is broken or is involved in a loop, an error is yielded 357 /// as the first entry of the traversal. 358 /// 359 /// When enabled, the yielded [`DirEntry`] values represent the target of 360 /// the link while the path corresponds to the link. See the [`DirEntry`] 361 /// type for more details, and all future entries will be contained within 362 /// the resolved directory behind the symbolic link of the root path. 363 /// 364 /// [`DirEntry`]: struct.DirEntry.html follow_root_links(mut self, yes: bool) -> Self365 pub fn follow_root_links(mut self, yes: bool) -> Self { 366 self.opts.follow_root_links = yes; 367 self 368 } 369 370 /// Set the maximum number of simultaneously open file descriptors used 371 /// by the iterator. 372 /// 373 /// `n` must be greater than or equal to `1`. If `n` is `0`, then it is set 374 /// to `1` automatically. If this is not set, then it defaults to some 375 /// reasonably low number. 376 /// 377 /// This setting has no impact on the results yielded by the iterator 378 /// (even when `n` is `1`). Instead, this setting represents a trade off 379 /// between scarce resources (file descriptors) and memory. Namely, when 380 /// the maximum number of file descriptors is reached and a new directory 381 /// needs to be opened to continue iteration, then a previous directory 382 /// handle is closed and has its unyielded entries stored in memory. In 383 /// practice, this is a satisfying trade off because it scales with respect 384 /// to the *depth* of your file tree. Therefore, low values (even `1`) are 385 /// acceptable. 386 /// 387 /// Note that this value does not impact the number of system calls made by 388 /// an exhausted iterator. 389 /// 390 /// # Platform behavior 391 /// 392 /// On Windows, if `follow_links` is enabled, then this limit is not 393 /// respected. In particular, the maximum number of file descriptors opened 394 /// is proportional to the depth of the directory tree traversed. max_open(mut self, mut n: usize) -> Self395 pub fn max_open(mut self, mut n: usize) -> Self { 396 if n == 0 { 397 n = 1; 398 } 399 self.opts.max_open = n; 400 self 401 } 402 403 /// Set a function for sorting directory entries with a comparator 404 /// function. 405 /// 406 /// If a compare function is set, the resulting iterator will return all 407 /// paths in sorted order. The compare function will be called to compare 408 /// entries from the same directory. 409 /// 410 /// ```rust,no_run 411 /// use std::cmp; 412 /// use std::ffi::OsString; 413 /// use walkdir::WalkDir; 414 /// 415 /// WalkDir::new("foo").sort_by(|a,b| a.file_name().cmp(b.file_name())); 416 /// ``` sort_by<F>(mut self, cmp: F) -> Self where F: FnMut(&DirEntry, &DirEntry) -> Ordering + Send + Sync + 'static,417 pub fn sort_by<F>(mut self, cmp: F) -> Self 418 where 419 F: FnMut(&DirEntry, &DirEntry) -> Ordering + Send + Sync + 'static, 420 { 421 self.opts.sorter = Some(Box::new(cmp)); 422 self 423 } 424 425 /// Set a function for sorting directory entries with a key extraction 426 /// function. 427 /// 428 /// If a compare function is set, the resulting iterator will return all 429 /// paths in sorted order. The compare function will be called to compare 430 /// entries from the same directory. 431 /// 432 /// ```rust,no_run 433 /// use std::cmp; 434 /// use std::ffi::OsString; 435 /// use walkdir::WalkDir; 436 /// 437 /// WalkDir::new("foo").sort_by_key(|a| a.file_name().to_owned()); 438 /// ``` sort_by_key<K, F>(self, mut cmp: F) -> Self where F: FnMut(&DirEntry) -> K + Send + Sync + 'static, K: Ord,439 pub fn sort_by_key<K, F>(self, mut cmp: F) -> Self 440 where 441 F: FnMut(&DirEntry) -> K + Send + Sync + 'static, 442 K: Ord, 443 { 444 self.sort_by(move |a, b| cmp(a).cmp(&cmp(b))) 445 } 446 447 /// Sort directory entries by file name, to ensure a deterministic order. 448 /// 449 /// This is a convenience function for calling `Self::sort_by()`. 450 /// 451 /// ```rust,no_run 452 /// use walkdir::WalkDir; 453 /// 454 /// WalkDir::new("foo").sort_by_file_name(); 455 /// ``` sort_by_file_name(self) -> Self456 pub fn sort_by_file_name(self) -> Self { 457 self.sort_by(|a, b| a.file_name().cmp(b.file_name())) 458 } 459 460 /// Yield a directory's contents before the directory itself. By default, 461 /// this is disabled. 462 /// 463 /// When `yes` is `false` (as is the default), the directory is yielded 464 /// before its contents are read. This is useful when, e.g. you want to 465 /// skip processing of some directories. 466 /// 467 /// When `yes` is `true`, the iterator yields the contents of a directory 468 /// before yielding the directory itself. This is useful when, e.g. you 469 /// want to recursively delete a directory. 470 /// 471 /// # Example 472 /// 473 /// Assume the following directory tree: 474 /// 475 /// ```text 476 /// foo/ 477 /// abc/ 478 /// qrs 479 /// tuv 480 /// def/ 481 /// ``` 482 /// 483 /// With contents_first disabled (the default), the following code visits 484 /// the directory tree in depth-first order: 485 /// 486 /// ```no_run 487 /// use walkdir::WalkDir; 488 /// 489 /// for entry in WalkDir::new("foo") { 490 /// let entry = entry.unwrap(); 491 /// println!("{}", entry.path().display()); 492 /// } 493 /// 494 /// // foo 495 /// // foo/abc 496 /// // foo/abc/qrs 497 /// // foo/abc/tuv 498 /// // foo/def 499 /// ``` 500 /// 501 /// With contents_first enabled: 502 /// 503 /// ```no_run 504 /// use walkdir::WalkDir; 505 /// 506 /// for entry in WalkDir::new("foo").contents_first(true) { 507 /// let entry = entry.unwrap(); 508 /// println!("{}", entry.path().display()); 509 /// } 510 /// 511 /// // foo/abc/qrs 512 /// // foo/abc/tuv 513 /// // foo/abc 514 /// // foo/def 515 /// // foo 516 /// ``` contents_first(mut self, yes: bool) -> Self517 pub fn contents_first(mut self, yes: bool) -> Self { 518 self.opts.contents_first = yes; 519 self 520 } 521 522 /// Do not cross file system boundaries. 523 /// 524 /// When this option is enabled, directory traversal will not descend into 525 /// directories that are on a different file system from the root path. 526 /// 527 /// Currently, this option is only supported on Unix and Windows. If this 528 /// option is used on an unsupported platform, then directory traversal 529 /// will immediately return an error and will not yield any entries. same_file_system(mut self, yes: bool) -> Self530 pub fn same_file_system(mut self, yes: bool) -> Self { 531 self.opts.same_file_system = yes; 532 self 533 } 534 } 535 536 impl IntoIterator for WalkDir { 537 type Item = Result<DirEntry>; 538 type IntoIter = IntoIter; 539 into_iter(self) -> IntoIter540 fn into_iter(self) -> IntoIter { 541 IntoIter { 542 opts: self.opts, 543 start: Some(self.root), 544 stack_list: vec![], 545 stack_path: vec![], 546 oldest_opened: 0, 547 depth: 0, 548 deferred_dirs: vec![], 549 root_device: None, 550 } 551 } 552 } 553 554 /// An iterator for recursively descending into a directory. 555 /// 556 /// A value with this type must be constructed with the [`WalkDir`] type, which 557 /// uses a builder pattern to set options such as min/max depth, max open file 558 /// descriptors and whether the iterator should follow symbolic links. After 559 /// constructing a `WalkDir`, call [`.into_iter()`] at the end of the chain. 560 /// 561 /// The order of elements yielded by this iterator is unspecified. 562 /// 563 /// [`WalkDir`]: struct.WalkDir.html 564 /// [`.into_iter()`]: struct.WalkDir.html#into_iter.v 565 #[derive(Debug)] 566 pub struct IntoIter { 567 /// Options specified in the builder. Depths, max fds, etc. 568 opts: WalkDirOptions, 569 /// The start path. 570 /// 571 /// This is only `Some(...)` at the beginning. After the first iteration, 572 /// this is always `None`. 573 start: Option<PathBuf>, 574 /// A stack of open (up to max fd) or closed handles to directories. 575 /// An open handle is a plain [`fs::ReadDir`] while a closed handle is 576 /// a `Vec<fs::DirEntry>` corresponding to the as-of-yet consumed entries. 577 /// 578 /// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html 579 stack_list: Vec<DirList>, 580 /// A stack of file paths. 581 /// 582 /// This is *only* used when [`follow_links`] is enabled. In all other 583 /// cases this stack is empty. 584 /// 585 /// [`follow_links`]: struct.WalkDir.html#method.follow_links 586 stack_path: Vec<Ancestor>, 587 /// An index into `stack_list` that points to the oldest open directory 588 /// handle. If the maximum fd limit is reached and a new directory needs to 589 /// be read, the handle at this index is closed before the new directory is 590 /// opened. 591 oldest_opened: usize, 592 /// The current depth of iteration (the length of the stack at the 593 /// beginning of each iteration). 594 depth: usize, 595 /// A list of DirEntries corresponding to directories, that are 596 /// yielded after their contents has been fully yielded. This is only 597 /// used when `contents_first` is enabled. 598 deferred_dirs: Vec<DirEntry>, 599 /// The device of the root file path when the first call to `next` was 600 /// made. 601 /// 602 /// If the `same_file_system` option isn't enabled, then this is always 603 /// `None`. Conversely, if it is enabled, this is always `Some(...)` after 604 /// handling the root path. 605 root_device: Option<u64>, 606 } 607 608 /// An ancestor is an item in the directory tree traversed by walkdir, and is 609 /// used to check for loops in the tree when traversing symlinks. 610 #[derive(Debug)] 611 struct Ancestor { 612 /// The path of this ancestor. 613 path: PathBuf, 614 /// An open file to this ancesor. This is only used on Windows where 615 /// opening a file handle appears to be quite expensive, so we choose to 616 /// cache it. This comes at the cost of not respecting the file descriptor 617 /// limit set by the user. 618 #[cfg(windows)] 619 handle: Handle, 620 } 621 622 impl Ancestor { 623 /// Create a new ancestor from the given directory path. 624 #[cfg(windows)] new(dent: &DirEntry) -> io::Result<Ancestor>625 fn new(dent: &DirEntry) -> io::Result<Ancestor> { 626 let handle = Handle::from_path(dent.path())?; 627 Ok(Ancestor { path: dent.path().to_path_buf(), handle }) 628 } 629 630 /// Create a new ancestor from the given directory path. 631 #[cfg(not(windows))] new(dent: &DirEntry) -> io::Result<Ancestor>632 fn new(dent: &DirEntry) -> io::Result<Ancestor> { 633 Ok(Ancestor { path: dent.path().to_path_buf() }) 634 } 635 636 /// Returns true if and only if the given open file handle corresponds to 637 /// the same directory as this ancestor. 638 #[cfg(windows)] is_same(&self, child: &Handle) -> io::Result<bool>639 fn is_same(&self, child: &Handle) -> io::Result<bool> { 640 Ok(child == &self.handle) 641 } 642 643 /// Returns true if and only if the given open file handle corresponds to 644 /// the same directory as this ancestor. 645 #[cfg(not(windows))] is_same(&self, child: &Handle) -> io::Result<bool>646 fn is_same(&self, child: &Handle) -> io::Result<bool> { 647 Ok(child == &Handle::from_path(&self.path)?) 648 } 649 } 650 651 /// A sequence of unconsumed directory entries. 652 /// 653 /// This represents the opened or closed state of a directory handle. When 654 /// open, future entries are read by iterating over the raw `fs::ReadDir`. 655 /// When closed, all future entries are read into memory. Iteration then 656 /// proceeds over a [`Vec<fs::DirEntry>`]. 657 /// 658 /// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html 659 /// [`Vec<fs::DirEntry>`]: https://doc.rust-lang.org/stable/std/vec/struct.Vec.html 660 #[derive(Debug)] 661 enum DirList { 662 /// An opened handle. 663 /// 664 /// This includes the depth of the handle itself. 665 /// 666 /// If there was an error with the initial [`fs::read_dir`] call, then it 667 /// is stored here. (We use an [`Option<...>`] to make yielding the error 668 /// exactly once simpler.) 669 /// 670 /// [`fs::read_dir`]: https://doc.rust-lang.org/stable/std/fs/fn.read_dir.html 671 /// [`Option<...>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html 672 Opened { depth: usize, it: result::Result<ReadDir, Option<Error>> }, 673 /// A closed handle. 674 /// 675 /// All remaining directory entries are read into memory. 676 Closed(vec::IntoIter<Result<DirEntry>>), 677 } 678 679 impl Iterator for IntoIter { 680 type Item = Result<DirEntry>; 681 /// Advances the iterator and returns the next value. 682 /// 683 /// # Errors 684 /// 685 /// If the iterator fails to retrieve the next value, this method returns 686 /// an error value. The error will be wrapped in an Option::Some. next(&mut self) -> Option<Result<DirEntry>>687 fn next(&mut self) -> Option<Result<DirEntry>> { 688 if let Some(start) = self.start.take() { 689 if self.opts.same_file_system { 690 let result = util::device_num(&start) 691 .map_err(|e| Error::from_path(0, start.clone(), e)); 692 self.root_device = Some(itry!(result)); 693 } 694 let dent = itry!(DirEntry::from_path(0, start, false)); 695 if let Some(result) = self.handle_entry(dent) { 696 return Some(result); 697 } 698 } 699 while !self.stack_list.is_empty() { 700 self.depth = self.stack_list.len(); 701 if let Some(dentry) = self.get_deferred_dir() { 702 return Some(Ok(dentry)); 703 } 704 if self.depth > self.opts.max_depth { 705 // If we've exceeded the max depth, pop the current dir 706 // so that we don't descend. 707 self.pop(); 708 continue; 709 } 710 // Unwrap is safe here because we've verified above that 711 // `self.stack_list` is not empty 712 let next = self 713 .stack_list 714 .last_mut() 715 .expect("BUG: stack should be non-empty") 716 .next(); 717 match next { 718 None => self.pop(), 719 Some(Err(err)) => return Some(Err(err)), 720 Some(Ok(dent)) => { 721 if let Some(result) = self.handle_entry(dent) { 722 return Some(result); 723 } 724 } 725 } 726 } 727 if self.opts.contents_first { 728 self.depth = self.stack_list.len(); 729 if let Some(dentry) = self.get_deferred_dir() { 730 return Some(Ok(dentry)); 731 } 732 } 733 None 734 } 735 } 736 737 impl IntoIter { 738 /// Skips the current directory. 739 /// 740 /// This causes the iterator to stop traversing the contents of the least 741 /// recently yielded directory. This means any remaining entries in that 742 /// directory will be skipped (including sub-directories). 743 /// 744 /// Note that the ergonomics of this method are questionable since it 745 /// borrows the iterator mutably. Namely, you must write out the looping 746 /// condition manually. For example, to skip hidden entries efficiently on 747 /// unix systems: 748 /// 749 /// ```no_run 750 /// use walkdir::{DirEntry, WalkDir}; 751 /// 752 /// fn is_hidden(entry: &DirEntry) -> bool { 753 /// entry.file_name() 754 /// .to_str() 755 /// .map(|s| s.starts_with(".")) 756 /// .unwrap_or(false) 757 /// } 758 /// 759 /// let mut it = WalkDir::new("foo").into_iter(); 760 /// loop { 761 /// let entry = match it.next() { 762 /// None => break, 763 /// Some(Err(err)) => panic!("ERROR: {}", err), 764 /// Some(Ok(entry)) => entry, 765 /// }; 766 /// if is_hidden(&entry) { 767 /// if entry.file_type().is_dir() { 768 /// it.skip_current_dir(); 769 /// } 770 /// continue; 771 /// } 772 /// println!("{}", entry.path().display()); 773 /// } 774 /// ``` 775 /// 776 /// You may find it more convenient to use the [`filter_entry`] iterator 777 /// adapter. (See its documentation for the same example functionality as 778 /// above.) 779 /// 780 /// [`filter_entry`]: #method.filter_entry skip_current_dir(&mut self)781 pub fn skip_current_dir(&mut self) { 782 if !self.stack_list.is_empty() { 783 self.pop(); 784 } 785 } 786 787 /// Yields only entries which satisfy the given predicate and skips 788 /// descending into directories that do not satisfy the given predicate. 789 /// 790 /// The predicate is applied to all entries. If the predicate is 791 /// true, iteration carries on as normal. If the predicate is false, the 792 /// entry is ignored and if it is a directory, it is not descended into. 793 /// 794 /// This is often more convenient to use than [`skip_current_dir`]. For 795 /// example, to skip hidden files and directories efficiently on unix 796 /// systems: 797 /// 798 /// ```no_run 799 /// use walkdir::{DirEntry, WalkDir}; 800 /// # use walkdir::Error; 801 /// 802 /// fn is_hidden(entry: &DirEntry) -> bool { 803 /// entry.file_name() 804 /// .to_str() 805 /// .map(|s| s.starts_with(".")) 806 /// .unwrap_or(false) 807 /// } 808 /// 809 /// # fn try_main() -> Result<(), Error> { 810 /// for entry in WalkDir::new("foo") 811 /// .into_iter() 812 /// .filter_entry(|e| !is_hidden(e)) { 813 /// println!("{}", entry?.path().display()); 814 /// } 815 /// # Ok(()) 816 /// # } 817 /// ``` 818 /// 819 /// Note that the iterator will still yield errors for reading entries that 820 /// may not satisfy the predicate. 821 /// 822 /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not 823 /// passed to this predicate. 824 /// 825 /// Note that if the iterator has `contents_first` enabled, then this 826 /// method is no different than calling the standard `Iterator::filter` 827 /// method (because directory entries are yielded after they've been 828 /// descended into). 829 /// 830 /// [`skip_current_dir`]: #method.skip_current_dir 831 /// [`min_depth`]: struct.WalkDir.html#method.min_depth 832 /// [`max_depth`]: struct.WalkDir.html#method.max_depth filter_entry<P>(self, predicate: P) -> FilterEntry<Self, P> where P: FnMut(&DirEntry) -> bool,833 pub fn filter_entry<P>(self, predicate: P) -> FilterEntry<Self, P> 834 where 835 P: FnMut(&DirEntry) -> bool, 836 { 837 FilterEntry { it: self, predicate } 838 } 839 handle_entry( &mut self, mut dent: DirEntry, ) -> Option<Result<DirEntry>>840 fn handle_entry( 841 &mut self, 842 mut dent: DirEntry, 843 ) -> Option<Result<DirEntry>> { 844 if self.opts.follow_links && dent.file_type().is_symlink() { 845 dent = itry!(self.follow(dent)); 846 } 847 let is_normal_dir = !dent.file_type().is_symlink() && dent.is_dir(); 848 if is_normal_dir { 849 if self.opts.same_file_system && dent.depth() > 0 { 850 if itry!(self.is_same_file_system(&dent)) { 851 itry!(self.push(&dent)); 852 } 853 } else { 854 itry!(self.push(&dent)); 855 } 856 } else if dent.depth() == 0 857 && dent.file_type().is_symlink() 858 && self.opts.follow_root_links 859 { 860 // As a special case, if we are processing a root entry, then we 861 // always follow it even if it's a symlink and follow_links is 862 // false. We are careful to not let this change the semantics of 863 // the DirEntry however. Namely, the DirEntry should still respect 864 // the follow_links setting. When it's disabled, it should report 865 // itself as a symlink. When it's enabled, it should always report 866 // itself as the target. 867 let md = itry!(fs::metadata(dent.path()).map_err(|err| { 868 Error::from_path(dent.depth(), dent.path().to_path_buf(), err) 869 })); 870 if md.file_type().is_dir() { 871 itry!(self.push(&dent)); 872 } 873 } 874 if is_normal_dir && self.opts.contents_first { 875 self.deferred_dirs.push(dent); 876 None 877 } else if self.skippable() { 878 None 879 } else { 880 Some(Ok(dent)) 881 } 882 } 883 get_deferred_dir(&mut self) -> Option<DirEntry>884 fn get_deferred_dir(&mut self) -> Option<DirEntry> { 885 if self.opts.contents_first { 886 if self.depth < self.deferred_dirs.len() { 887 // Unwrap is safe here because we've guaranteed that 888 // `self.deferred_dirs.len()` can never be less than 1 889 let deferred: DirEntry = self 890 .deferred_dirs 891 .pop() 892 .expect("BUG: deferred_dirs should be non-empty"); 893 if !self.skippable() { 894 return Some(deferred); 895 } 896 } 897 } 898 None 899 } 900 push(&mut self, dent: &DirEntry) -> Result<()>901 fn push(&mut self, dent: &DirEntry) -> Result<()> { 902 // Make room for another open file descriptor if we've hit the max. 903 let free = 904 self.stack_list.len().checked_sub(self.oldest_opened).unwrap(); 905 if free == self.opts.max_open { 906 self.stack_list[self.oldest_opened].close(); 907 } 908 // Open a handle to reading the directory's entries. 909 let rd = fs::read_dir(dent.path()).map_err(|err| { 910 Some(Error::from_path(self.depth, dent.path().to_path_buf(), err)) 911 }); 912 let mut list = DirList::Opened { depth: self.depth, it: rd }; 913 if let Some(ref mut cmp) = self.opts.sorter { 914 let mut entries: Vec<_> = list.collect(); 915 entries.sort_by(|a, b| match (a, b) { 916 (&Ok(ref a), &Ok(ref b)) => cmp(a, b), 917 (&Err(_), &Err(_)) => Ordering::Equal, 918 (&Ok(_), &Err(_)) => Ordering::Greater, 919 (&Err(_), &Ok(_)) => Ordering::Less, 920 }); 921 list = DirList::Closed(entries.into_iter()); 922 } 923 if self.opts.follow_links { 924 let ancestor = Ancestor::new(&dent) 925 .map_err(|err| Error::from_io(self.depth, err))?; 926 self.stack_path.push(ancestor); 927 } 928 // We push this after stack_path since creating the Ancestor can fail. 929 // If it fails, then we return the error and won't descend. 930 self.stack_list.push(list); 931 // If we had to close out a previous directory stream, then we need to 932 // increment our index the oldest still-open stream. We do this only 933 // after adding to our stack, in order to ensure that the oldest_opened 934 // index remains valid. The worst that can happen is that an already 935 // closed stream will be closed again, which is a no-op. 936 // 937 // We could move the close of the stream above into this if-body, but 938 // then we would have more than the maximum number of file descriptors 939 // open at a particular point in time. 940 if free == self.opts.max_open { 941 // Unwrap is safe here because self.oldest_opened is guaranteed to 942 // never be greater than `self.stack_list.len()`, which implies 943 // that the subtraction won't underflow and that adding 1 will 944 // never overflow. 945 self.oldest_opened = self.oldest_opened.checked_add(1).unwrap(); 946 } 947 Ok(()) 948 } 949 pop(&mut self)950 fn pop(&mut self) { 951 self.stack_list.pop().expect("BUG: cannot pop from empty stack"); 952 if self.opts.follow_links { 953 self.stack_path.pop().expect("BUG: list/path stacks out of sync"); 954 } 955 // If everything in the stack is already closed, then there is 956 // room for at least one more open descriptor and it will 957 // always be at the top of the stack. 958 self.oldest_opened = min(self.oldest_opened, self.stack_list.len()); 959 } 960 follow(&self, mut dent: DirEntry) -> Result<DirEntry>961 fn follow(&self, mut dent: DirEntry) -> Result<DirEntry> { 962 dent = 963 DirEntry::from_path(self.depth, dent.path().to_path_buf(), true)?; 964 // The only way a symlink can cause a loop is if it points 965 // to a directory. Otherwise, it always points to a leaf 966 // and we can omit any loop checks. 967 if dent.is_dir() { 968 self.check_loop(dent.path())?; 969 } 970 Ok(dent) 971 } 972 check_loop<P: AsRef<Path>>(&self, child: P) -> Result<()>973 fn check_loop<P: AsRef<Path>>(&self, child: P) -> Result<()> { 974 let hchild = Handle::from_path(&child) 975 .map_err(|err| Error::from_io(self.depth, err))?; 976 for ancestor in self.stack_path.iter().rev() { 977 let is_same = ancestor 978 .is_same(&hchild) 979 .map_err(|err| Error::from_io(self.depth, err))?; 980 if is_same { 981 return Err(Error::from_loop( 982 self.depth, 983 &ancestor.path, 984 child.as_ref(), 985 )); 986 } 987 } 988 Ok(()) 989 } 990 is_same_file_system(&mut self, dent: &DirEntry) -> Result<bool>991 fn is_same_file_system(&mut self, dent: &DirEntry) -> Result<bool> { 992 let dent_device = util::device_num(dent.path()) 993 .map_err(|err| Error::from_entry(dent, err))?; 994 Ok(self 995 .root_device 996 .map(|d| d == dent_device) 997 .expect("BUG: called is_same_file_system without root device")) 998 } 999 skippable(&self) -> bool1000 fn skippable(&self) -> bool { 1001 self.depth < self.opts.min_depth || self.depth > self.opts.max_depth 1002 } 1003 } 1004 1005 impl iter::FusedIterator for IntoIter {} 1006 1007 impl DirList { close(&mut self)1008 fn close(&mut self) { 1009 if let DirList::Opened { .. } = *self { 1010 *self = DirList::Closed(self.collect::<Vec<_>>().into_iter()); 1011 } 1012 } 1013 } 1014 1015 impl Iterator for DirList { 1016 type Item = Result<DirEntry>; 1017 1018 #[inline(always)] next(&mut self) -> Option<Result<DirEntry>>1019 fn next(&mut self) -> Option<Result<DirEntry>> { 1020 match *self { 1021 DirList::Closed(ref mut it) => it.next(), 1022 DirList::Opened { depth, ref mut it } => match *it { 1023 Err(ref mut err) => err.take().map(Err), 1024 Ok(ref mut rd) => rd.next().map(|r| match r { 1025 Ok(r) => DirEntry::from_entry(depth + 1, &r), 1026 Err(err) => Err(Error::from_io(depth + 1, err)), 1027 }), 1028 }, 1029 } 1030 } 1031 } 1032 1033 /// A recursive directory iterator that skips entries. 1034 /// 1035 /// Values of this type are created by calling [`.filter_entry()`] on an 1036 /// `IntoIter`, which is formed by calling [`.into_iter()`] on a `WalkDir`. 1037 /// 1038 /// Directories that fail the predicate `P` are skipped. Namely, they are 1039 /// never yielded and never descended into. 1040 /// 1041 /// Entries that are skipped with the [`min_depth`] and [`max_depth`] options 1042 /// are not passed through this filter. 1043 /// 1044 /// If opening a handle to a directory resulted in an error, then it is yielded 1045 /// and no corresponding call to the predicate is made. 1046 /// 1047 /// Type parameter `I` refers to the underlying iterator and `P` refers to the 1048 /// predicate, which is usually `FnMut(&DirEntry) -> bool`. 1049 /// 1050 /// [`.filter_entry()`]: struct.IntoIter.html#method.filter_entry 1051 /// [`.into_iter()`]: struct.WalkDir.html#into_iter.v 1052 /// [`min_depth`]: struct.WalkDir.html#method.min_depth 1053 /// [`max_depth`]: struct.WalkDir.html#method.max_depth 1054 #[derive(Debug)] 1055 pub struct FilterEntry<I, P> { 1056 it: I, 1057 predicate: P, 1058 } 1059 1060 impl<P> Iterator for FilterEntry<IntoIter, P> 1061 where 1062 P: FnMut(&DirEntry) -> bool, 1063 { 1064 type Item = Result<DirEntry>; 1065 1066 /// Advances the iterator and returns the next value. 1067 /// 1068 /// # Errors 1069 /// 1070 /// If the iterator fails to retrieve the next value, this method returns 1071 /// an error value. The error will be wrapped in an `Option::Some`. next(&mut self) -> Option<Result<DirEntry>>1072 fn next(&mut self) -> Option<Result<DirEntry>> { 1073 loop { 1074 let dent = match self.it.next() { 1075 None => return None, 1076 Some(result) => itry!(result), 1077 }; 1078 if !(self.predicate)(&dent) { 1079 if dent.is_dir() { 1080 self.it.skip_current_dir(); 1081 } 1082 continue; 1083 } 1084 return Some(Ok(dent)); 1085 } 1086 } 1087 } 1088 1089 impl<P> iter::FusedIterator for FilterEntry<IntoIter, P> where 1090 P: FnMut(&DirEntry) -> bool 1091 { 1092 } 1093 1094 impl<P> FilterEntry<IntoIter, P> 1095 where 1096 P: FnMut(&DirEntry) -> bool, 1097 { 1098 /// Yields only entries which satisfy the given predicate and skips 1099 /// descending into directories that do not satisfy the given predicate. 1100 /// 1101 /// The predicate is applied to all entries. If the predicate is 1102 /// true, iteration carries on as normal. If the predicate is false, the 1103 /// entry is ignored and if it is a directory, it is not descended into. 1104 /// 1105 /// This is often more convenient to use than [`skip_current_dir`]. For 1106 /// example, to skip hidden files and directories efficiently on unix 1107 /// systems: 1108 /// 1109 /// ```no_run 1110 /// use walkdir::{DirEntry, WalkDir}; 1111 /// # use walkdir::Error; 1112 /// 1113 /// fn is_hidden(entry: &DirEntry) -> bool { 1114 /// entry.file_name() 1115 /// .to_str() 1116 /// .map(|s| s.starts_with(".")) 1117 /// .unwrap_or(false) 1118 /// } 1119 /// 1120 /// # fn try_main() -> Result<(), Error> { 1121 /// for entry in WalkDir::new("foo") 1122 /// .into_iter() 1123 /// .filter_entry(|e| !is_hidden(e)) { 1124 /// println!("{}", entry?.path().display()); 1125 /// } 1126 /// # Ok(()) 1127 /// # } 1128 /// ``` 1129 /// 1130 /// Note that the iterator will still yield errors for reading entries that 1131 /// may not satisfy the predicate. 1132 /// 1133 /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not 1134 /// passed to this predicate. 1135 /// 1136 /// Note that if the iterator has `contents_first` enabled, then this 1137 /// method is no different than calling the standard `Iterator::filter` 1138 /// method (because directory entries are yielded after they've been 1139 /// descended into). 1140 /// 1141 /// [`skip_current_dir`]: #method.skip_current_dir 1142 /// [`min_depth`]: struct.WalkDir.html#method.min_depth 1143 /// [`max_depth`]: struct.WalkDir.html#method.max_depth filter_entry(self, predicate: P) -> FilterEntry<Self, P>1144 pub fn filter_entry(self, predicate: P) -> FilterEntry<Self, P> { 1145 FilterEntry { it: self, predicate } 1146 } 1147 1148 /// Skips the current directory. 1149 /// 1150 /// This causes the iterator to stop traversing the contents of the least 1151 /// recently yielded directory. This means any remaining entries in that 1152 /// directory will be skipped (including sub-directories). 1153 /// 1154 /// Note that the ergonomics of this method are questionable since it 1155 /// borrows the iterator mutably. Namely, you must write out the looping 1156 /// condition manually. For example, to skip hidden entries efficiently on 1157 /// unix systems: 1158 /// 1159 /// ```no_run 1160 /// use walkdir::{DirEntry, WalkDir}; 1161 /// 1162 /// fn is_hidden(entry: &DirEntry) -> bool { 1163 /// entry.file_name() 1164 /// .to_str() 1165 /// .map(|s| s.starts_with(".")) 1166 /// .unwrap_or(false) 1167 /// } 1168 /// 1169 /// let mut it = WalkDir::new("foo").into_iter(); 1170 /// loop { 1171 /// let entry = match it.next() { 1172 /// None => break, 1173 /// Some(Err(err)) => panic!("ERROR: {}", err), 1174 /// Some(Ok(entry)) => entry, 1175 /// }; 1176 /// if is_hidden(&entry) { 1177 /// if entry.file_type().is_dir() { 1178 /// it.skip_current_dir(); 1179 /// } 1180 /// continue; 1181 /// } 1182 /// println!("{}", entry.path().display()); 1183 /// } 1184 /// ``` 1185 /// 1186 /// You may find it more convenient to use the [`filter_entry`] iterator 1187 /// adapter. (See its documentation for the same example functionality as 1188 /// above.) 1189 /// 1190 /// [`filter_entry`]: #method.filter_entry skip_current_dir(&mut self)1191 pub fn skip_current_dir(&mut self) { 1192 self.it.skip_current_dir(); 1193 } 1194 } 1195