1 use std::{ 2 fs::File, 3 io::{self, BufRead, Seek}, 4 marker::PhantomData, 5 path::Path, 6 result, 7 }; 8 9 use { 10 csv_core::{Reader as CoreReader, ReaderBuilder as CoreReaderBuilder}, 11 serde::de::DeserializeOwned, 12 }; 13 14 use crate::{ 15 byte_record::{ByteRecord, Position}, 16 error::{Error, ErrorKind, Result, Utf8Error}, 17 string_record::StringRecord, 18 {Terminator, Trim}, 19 }; 20 21 /// Builds a CSV reader with various configuration knobs. 22 /// 23 /// This builder can be used to tweak the field delimiter, record terminator 24 /// and more. Once a CSV `Reader` is built, its configuration cannot be 25 /// changed. 26 #[derive(Debug)] 27 pub struct ReaderBuilder { 28 capacity: usize, 29 flexible: bool, 30 has_headers: bool, 31 trim: Trim, 32 /// The underlying CSV parser builder. 33 /// 34 /// We explicitly put this on the heap because CoreReaderBuilder embeds an 35 /// entire DFA transition table, which along with other things, tallies up 36 /// to almost 500 bytes on the stack. 37 builder: Box<CoreReaderBuilder>, 38 } 39 40 impl Default for ReaderBuilder { default() -> ReaderBuilder41 fn default() -> ReaderBuilder { 42 ReaderBuilder { 43 capacity: 8 * (1 << 10), 44 flexible: false, 45 has_headers: true, 46 trim: Trim::default(), 47 builder: Box::new(CoreReaderBuilder::default()), 48 } 49 } 50 } 51 52 impl ReaderBuilder { 53 /// Create a new builder for configuring CSV parsing. 54 /// 55 /// To convert a builder into a reader, call one of the methods starting 56 /// with `from_`. 57 /// 58 /// # Example 59 /// 60 /// ``` 61 /// use std::error::Error; 62 /// use csv::{ReaderBuilder, StringRecord}; 63 /// 64 /// # fn main() { example().unwrap(); } 65 /// fn example() -> Result<(), Box<dyn Error>> { 66 /// let data = "\ 67 /// city,country,pop 68 /// Boston,United States,4628910 69 /// Concord,United States,42695 70 /// "; 71 /// let mut rdr = ReaderBuilder::new().from_reader(data.as_bytes()); 72 /// 73 /// let records = rdr 74 /// .records() 75 /// .collect::<Result<Vec<StringRecord>, csv::Error>>()?; 76 /// assert_eq!(records, vec![ 77 /// vec!["Boston", "United States", "4628910"], 78 /// vec!["Concord", "United States", "42695"], 79 /// ]); 80 /// Ok(()) 81 /// } 82 /// ``` new() -> ReaderBuilder83 pub fn new() -> ReaderBuilder { 84 ReaderBuilder::default() 85 } 86 87 /// Build a CSV parser from this configuration that reads data from the 88 /// given file path. 89 /// 90 /// If there was a problem opening the file at the given path, then this 91 /// returns the corresponding error. 92 /// 93 /// # Example 94 /// 95 /// ```no_run 96 /// use std::error::Error; 97 /// use csv::ReaderBuilder; 98 /// 99 /// # fn main() { example().unwrap(); } 100 /// fn example() -> Result<(), Box<dyn Error>> { 101 /// let mut rdr = ReaderBuilder::new().from_path("foo.csv")?; 102 /// for result in rdr.records() { 103 /// let record = result?; 104 /// println!("{:?}", record); 105 /// } 106 /// Ok(()) 107 /// } 108 /// ``` from_path<P: AsRef<Path>>(&self, path: P) -> Result<Reader<File>>109 pub fn from_path<P: AsRef<Path>>(&self, path: P) -> Result<Reader<File>> { 110 Ok(Reader::new(self, File::open(path)?)) 111 } 112 113 /// Build a CSV parser from this configuration that reads data from `rdr`. 114 /// 115 /// Note that the CSV reader is buffered automatically, so you should not 116 /// wrap `rdr` in a buffered reader like `io::BufReader`. 117 /// 118 /// # Example 119 /// 120 /// ``` 121 /// use std::error::Error; 122 /// use csv::ReaderBuilder; 123 /// 124 /// # fn main() { example().unwrap(); } 125 /// fn example() -> Result<(), Box<dyn Error>> { 126 /// let data = "\ 127 /// city,country,pop 128 /// Boston,United States,4628910 129 /// Concord,United States,42695 130 /// "; 131 /// let mut rdr = ReaderBuilder::new().from_reader(data.as_bytes()); 132 /// for result in rdr.records() { 133 /// let record = result?; 134 /// println!("{:?}", record); 135 /// } 136 /// Ok(()) 137 /// } 138 /// ``` from_reader<R: io::Read>(&self, rdr: R) -> Reader<R>139 pub fn from_reader<R: io::Read>(&self, rdr: R) -> Reader<R> { 140 Reader::new(self, rdr) 141 } 142 143 /// The field delimiter to use when parsing CSV. 144 /// 145 /// The default is `b','`. 146 /// 147 /// # Example 148 /// 149 /// ``` 150 /// use std::error::Error; 151 /// use csv::ReaderBuilder; 152 /// 153 /// # fn main() { example().unwrap(); } 154 /// fn example() -> Result<(), Box<dyn Error>> { 155 /// let data = "\ 156 /// city;country;pop 157 /// Boston;United States;4628910 158 /// "; 159 /// let mut rdr = ReaderBuilder::new() 160 /// .delimiter(b';') 161 /// .from_reader(data.as_bytes()); 162 /// 163 /// if let Some(result) = rdr.records().next() { 164 /// let record = result?; 165 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 166 /// Ok(()) 167 /// } else { 168 /// Err(From::from("expected at least one record but got none")) 169 /// } 170 /// } 171 /// ``` delimiter(&mut self, delimiter: u8) -> &mut ReaderBuilder172 pub fn delimiter(&mut self, delimiter: u8) -> &mut ReaderBuilder { 173 self.builder.delimiter(delimiter); 174 self 175 } 176 177 /// Whether to treat the first row as a special header row. 178 /// 179 /// By default, the first row is treated as a special header row, which 180 /// means the header is never returned by any of the record reading methods 181 /// or iterators. When this is disabled (`yes` set to `false`), the first 182 /// row is not treated specially. 183 /// 184 /// Note that the `headers` and `byte_headers` methods are unaffected by 185 /// whether this is set. Those methods always return the first record. 186 /// 187 /// # Example 188 /// 189 /// This example shows what happens when `has_headers` is disabled. 190 /// Namely, the first row is treated just like any other row. 191 /// 192 /// ``` 193 /// use std::error::Error; 194 /// use csv::ReaderBuilder; 195 /// 196 /// # fn main() { example().unwrap(); } 197 /// fn example() -> Result<(), Box<dyn Error>> { 198 /// let data = "\ 199 /// city,country,pop 200 /// Boston,United States,4628910 201 /// "; 202 /// let mut rdr = ReaderBuilder::new() 203 /// .has_headers(false) 204 /// .from_reader(data.as_bytes()); 205 /// let mut iter = rdr.records(); 206 /// 207 /// // Read the first record. 208 /// if let Some(result) = iter.next() { 209 /// let record = result?; 210 /// assert_eq!(record, vec!["city", "country", "pop"]); 211 /// } else { 212 /// return Err(From::from( 213 /// "expected at least two records but got none")); 214 /// } 215 /// 216 /// // Read the second record. 217 /// if let Some(result) = iter.next() { 218 /// let record = result?; 219 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 220 /// } else { 221 /// return Err(From::from( 222 /// "expected at least two records but got one")) 223 /// } 224 /// Ok(()) 225 /// } 226 /// ``` has_headers(&mut self, yes: bool) -> &mut ReaderBuilder227 pub fn has_headers(&mut self, yes: bool) -> &mut ReaderBuilder { 228 self.has_headers = yes; 229 self 230 } 231 232 /// Whether the number of fields in records is allowed to change or not. 233 /// 234 /// When disabled (which is the default), parsing CSV data will return an 235 /// error if a record is found with a number of fields different from the 236 /// number of fields in a previous record. 237 /// 238 /// When enabled, this error checking is turned off. 239 /// 240 /// # Example: flexible records enabled 241 /// 242 /// ``` 243 /// use std::error::Error; 244 /// use csv::ReaderBuilder; 245 /// 246 /// # fn main() { example().unwrap(); } 247 /// fn example() -> Result<(), Box<dyn Error>> { 248 /// // Notice that the first row is missing the population count. 249 /// let data = "\ 250 /// city,country,pop 251 /// Boston,United States 252 /// "; 253 /// let mut rdr = ReaderBuilder::new() 254 /// .flexible(true) 255 /// .from_reader(data.as_bytes()); 256 /// 257 /// if let Some(result) = rdr.records().next() { 258 /// let record = result?; 259 /// assert_eq!(record, vec!["Boston", "United States"]); 260 /// Ok(()) 261 /// } else { 262 /// Err(From::from("expected at least one record but got none")) 263 /// } 264 /// } 265 /// ``` 266 /// 267 /// # Example: flexible records disabled 268 /// 269 /// This shows the error that appears when records of unequal length 270 /// are found and flexible records have been disabled (which is the 271 /// default). 272 /// 273 /// ``` 274 /// use std::error::Error; 275 /// use csv::{ErrorKind, ReaderBuilder}; 276 /// 277 /// # fn main() { example().unwrap(); } 278 /// fn example() -> Result<(), Box<dyn Error>> { 279 /// // Notice that the first row is missing the population count. 280 /// let data = "\ 281 /// city,country,pop 282 /// Boston,United States 283 /// "; 284 /// let mut rdr = ReaderBuilder::new() 285 /// .flexible(false) 286 /// .from_reader(data.as_bytes()); 287 /// 288 /// if let Some(Err(err)) = rdr.records().next() { 289 /// match *err.kind() { 290 /// ErrorKind::UnequalLengths { expected_len, len, .. } => { 291 /// // The header row has 3 fields... 292 /// assert_eq!(expected_len, 3); 293 /// // ... but the first row has only 2 fields. 294 /// assert_eq!(len, 2); 295 /// Ok(()) 296 /// } 297 /// ref wrong => { 298 /// Err(From::from(format!( 299 /// "expected UnequalLengths error but got {:?}", 300 /// wrong))) 301 /// } 302 /// } 303 /// } else { 304 /// Err(From::from( 305 /// "expected at least one errored record but got none")) 306 /// } 307 /// } 308 /// ``` flexible(&mut self, yes: bool) -> &mut ReaderBuilder309 pub fn flexible(&mut self, yes: bool) -> &mut ReaderBuilder { 310 self.flexible = yes; 311 self 312 } 313 314 /// Whether fields are trimmed of leading and trailing whitespace or not. 315 /// 316 /// By default, no trimming is performed. This method permits one to 317 /// override that behavior and choose one of the following options: 318 /// 319 /// 1. `Trim::Headers` trims only header values. 320 /// 2. `Trim::Fields` trims only non-header or "field" values. 321 /// 3. `Trim::All` trims both header and non-header values. 322 /// 323 /// A value is only interpreted as a header value if this CSV reader is 324 /// configured to read a header record (which is the default). 325 /// 326 /// When reading string records, characters meeting the definition of 327 /// Unicode whitespace are trimmed. When reading byte records, characters 328 /// meeting the definition of ASCII whitespace are trimmed. ASCII 329 /// whitespace characters correspond to the set `[\t\n\v\f\r ]`. 330 /// 331 /// # Example 332 /// 333 /// This example shows what happens when all values are trimmed. 334 /// 335 /// ``` 336 /// use std::error::Error; 337 /// use csv::{ReaderBuilder, StringRecord, Trim}; 338 /// 339 /// # fn main() { example().unwrap(); } 340 /// fn example() -> Result<(), Box<dyn Error>> { 341 /// let data = "\ 342 /// city , country , pop 343 /// Boston,\" 344 /// United States\",4628910 345 /// Concord, United States ,42695 346 /// "; 347 /// let mut rdr = ReaderBuilder::new() 348 /// .trim(Trim::All) 349 /// .from_reader(data.as_bytes()); 350 /// let records = rdr 351 /// .records() 352 /// .collect::<Result<Vec<StringRecord>, csv::Error>>()?; 353 /// assert_eq!(records, vec![ 354 /// vec!["Boston", "United States", "4628910"], 355 /// vec!["Concord", "United States", "42695"], 356 /// ]); 357 /// Ok(()) 358 /// } 359 /// ``` trim(&mut self, trim: Trim) -> &mut ReaderBuilder360 pub fn trim(&mut self, trim: Trim) -> &mut ReaderBuilder { 361 self.trim = trim; 362 self 363 } 364 365 /// The record terminator to use when parsing CSV. 366 /// 367 /// A record terminator can be any single byte. The default is a special 368 /// value, `Terminator::CRLF`, which treats any occurrence of `\r`, `\n` 369 /// or `\r\n` as a single record terminator. 370 /// 371 /// # Example: `$` as a record terminator 372 /// 373 /// ``` 374 /// use std::error::Error; 375 /// use csv::{ReaderBuilder, Terminator}; 376 /// 377 /// # fn main() { example().unwrap(); } 378 /// fn example() -> Result<(), Box<dyn Error>> { 379 /// let data = "city,country,pop$Boston,United States,4628910"; 380 /// let mut rdr = ReaderBuilder::new() 381 /// .terminator(Terminator::Any(b'$')) 382 /// .from_reader(data.as_bytes()); 383 /// 384 /// if let Some(result) = rdr.records().next() { 385 /// let record = result?; 386 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 387 /// Ok(()) 388 /// } else { 389 /// Err(From::from("expected at least one record but got none")) 390 /// } 391 /// } 392 /// ``` terminator(&mut self, term: Terminator) -> &mut ReaderBuilder393 pub fn terminator(&mut self, term: Terminator) -> &mut ReaderBuilder { 394 self.builder.terminator(term.to_core()); 395 self 396 } 397 398 /// The quote character to use when parsing CSV. 399 /// 400 /// The default is `b'"'`. 401 /// 402 /// # Example: single quotes instead of double quotes 403 /// 404 /// ``` 405 /// use std::error::Error; 406 /// use csv::ReaderBuilder; 407 /// 408 /// # fn main() { example().unwrap(); } 409 /// fn example() -> Result<(), Box<dyn Error>> { 410 /// let data = "\ 411 /// city,country,pop 412 /// Boston,'United States',4628910 413 /// "; 414 /// let mut rdr = ReaderBuilder::new() 415 /// .quote(b'\'') 416 /// .from_reader(data.as_bytes()); 417 /// 418 /// if let Some(result) = rdr.records().next() { 419 /// let record = result?; 420 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 421 /// Ok(()) 422 /// } else { 423 /// Err(From::from("expected at least one record but got none")) 424 /// } 425 /// } 426 /// ``` quote(&mut self, quote: u8) -> &mut ReaderBuilder427 pub fn quote(&mut self, quote: u8) -> &mut ReaderBuilder { 428 self.builder.quote(quote); 429 self 430 } 431 432 /// The escape character to use when parsing CSV. 433 /// 434 /// In some variants of CSV, quotes are escaped using a special escape 435 /// character like `\` (instead of escaping quotes by doubling them). 436 /// 437 /// By default, recognizing these idiosyncratic escapes is disabled. 438 /// 439 /// # Example 440 /// 441 /// ``` 442 /// use std::error::Error; 443 /// use csv::ReaderBuilder; 444 /// 445 /// # fn main() { example().unwrap(); } 446 /// fn example() -> Result<(), Box<dyn Error>> { 447 /// let data = "\ 448 /// city,country,pop 449 /// Boston,\"The \\\"United\\\" States\",4628910 450 /// "; 451 /// let mut rdr = ReaderBuilder::new() 452 /// .escape(Some(b'\\')) 453 /// .from_reader(data.as_bytes()); 454 /// 455 /// if let Some(result) = rdr.records().next() { 456 /// let record = result?; 457 /// assert_eq!(record, vec![ 458 /// "Boston", "The \"United\" States", "4628910", 459 /// ]); 460 /// Ok(()) 461 /// } else { 462 /// Err(From::from("expected at least one record but got none")) 463 /// } 464 /// } 465 /// ``` escape(&mut self, escape: Option<u8>) -> &mut ReaderBuilder466 pub fn escape(&mut self, escape: Option<u8>) -> &mut ReaderBuilder { 467 self.builder.escape(escape); 468 self 469 } 470 471 /// Enable double quote escapes. 472 /// 473 /// This is enabled by default, but it may be disabled. When disabled, 474 /// doubled quotes are not interpreted as escapes. 475 /// 476 /// # Example 477 /// 478 /// ``` 479 /// use std::error::Error; 480 /// use csv::ReaderBuilder; 481 /// 482 /// # fn main() { example().unwrap(); } 483 /// fn example() -> Result<(), Box<dyn Error>> { 484 /// let data = "\ 485 /// city,country,pop 486 /// Boston,\"The \"\"United\"\" States\",4628910 487 /// "; 488 /// let mut rdr = ReaderBuilder::new() 489 /// .double_quote(false) 490 /// .from_reader(data.as_bytes()); 491 /// 492 /// if let Some(result) = rdr.records().next() { 493 /// let record = result?; 494 /// assert_eq!(record, vec![ 495 /// "Boston", "The \"United\"\" States\"", "4628910", 496 /// ]); 497 /// Ok(()) 498 /// } else { 499 /// Err(From::from("expected at least one record but got none")) 500 /// } 501 /// } 502 /// ``` double_quote(&mut self, yes: bool) -> &mut ReaderBuilder503 pub fn double_quote(&mut self, yes: bool) -> &mut ReaderBuilder { 504 self.builder.double_quote(yes); 505 self 506 } 507 508 /// Enable or disable quoting. 509 /// 510 /// This is enabled by default, but it may be disabled. When disabled, 511 /// quotes are not treated specially. 512 /// 513 /// # Example 514 /// 515 /// ``` 516 /// use std::error::Error; 517 /// use csv::ReaderBuilder; 518 /// 519 /// # fn main() { example().unwrap(); } 520 /// fn example() -> Result<(), Box<dyn Error>> { 521 /// let data = "\ 522 /// city,country,pop 523 /// Boston,\"The United States,4628910 524 /// "; 525 /// let mut rdr = ReaderBuilder::new() 526 /// .quoting(false) 527 /// .from_reader(data.as_bytes()); 528 /// 529 /// if let Some(result) = rdr.records().next() { 530 /// let record = result?; 531 /// assert_eq!(record, vec![ 532 /// "Boston", "\"The United States", "4628910", 533 /// ]); 534 /// Ok(()) 535 /// } else { 536 /// Err(From::from("expected at least one record but got none")) 537 /// } 538 /// } 539 /// ``` quoting(&mut self, yes: bool) -> &mut ReaderBuilder540 pub fn quoting(&mut self, yes: bool) -> &mut ReaderBuilder { 541 self.builder.quoting(yes); 542 self 543 } 544 545 /// The comment character to use when parsing CSV. 546 /// 547 /// If the start of a record begins with the byte given here, then that 548 /// line is ignored by the CSV parser. 549 /// 550 /// This is disabled by default. 551 /// 552 /// # Example 553 /// 554 /// ``` 555 /// use std::error::Error; 556 /// use csv::ReaderBuilder; 557 /// 558 /// # fn main() { example().unwrap(); } 559 /// fn example() -> Result<(), Box<dyn Error>> { 560 /// let data = "\ 561 /// city,country,pop 562 /// #Concord,United States,42695 563 /// Boston,United States,4628910 564 /// "; 565 /// let mut rdr = ReaderBuilder::new() 566 /// .comment(Some(b'#')) 567 /// .from_reader(data.as_bytes()); 568 /// 569 /// if let Some(result) = rdr.records().next() { 570 /// let record = result?; 571 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 572 /// Ok(()) 573 /// } else { 574 /// Err(From::from("expected at least one record but got none")) 575 /// } 576 /// } 577 /// ``` comment(&mut self, comment: Option<u8>) -> &mut ReaderBuilder578 pub fn comment(&mut self, comment: Option<u8>) -> &mut ReaderBuilder { 579 self.builder.comment(comment); 580 self 581 } 582 583 /// A convenience method for specifying a configuration to read ASCII 584 /// delimited text. 585 /// 586 /// This sets the delimiter and record terminator to the ASCII unit 587 /// separator (`\x1F`) and record separator (`\x1E`), respectively. 588 /// 589 /// # Example 590 /// 591 /// ``` 592 /// use std::error::Error; 593 /// use csv::ReaderBuilder; 594 /// 595 /// # fn main() { example().unwrap(); } 596 /// fn example() -> Result<(), Box<dyn Error>> { 597 /// let data = "\ 598 /// city\x1Fcountry\x1Fpop\x1EBoston\x1FUnited States\x1F4628910"; 599 /// let mut rdr = ReaderBuilder::new() 600 /// .ascii() 601 /// .from_reader(data.as_bytes()); 602 /// 603 /// if let Some(result) = rdr.records().next() { 604 /// let record = result?; 605 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 606 /// Ok(()) 607 /// } else { 608 /// Err(From::from("expected at least one record but got none")) 609 /// } 610 /// } 611 /// ``` ascii(&mut self) -> &mut ReaderBuilder612 pub fn ascii(&mut self) -> &mut ReaderBuilder { 613 self.builder.ascii(); 614 self 615 } 616 617 /// Set the capacity (in bytes) of the buffer used in the CSV reader. 618 /// This defaults to a reasonable setting. buffer_capacity(&mut self, capacity: usize) -> &mut ReaderBuilder619 pub fn buffer_capacity(&mut self, capacity: usize) -> &mut ReaderBuilder { 620 self.capacity = capacity; 621 self 622 } 623 624 /// Enable or disable the NFA for parsing CSV. 625 /// 626 /// This is intended to be a debug option. The NFA is always slower than 627 /// the DFA. 628 #[doc(hidden)] nfa(&mut self, yes: bool) -> &mut ReaderBuilder629 pub fn nfa(&mut self, yes: bool) -> &mut ReaderBuilder { 630 self.builder.nfa(yes); 631 self 632 } 633 } 634 635 /// A already configured CSV reader. 636 /// 637 /// A CSV reader takes as input CSV data and transforms that into standard Rust 638 /// values. The most flexible way to read CSV data is as a sequence of records, 639 /// where a record is a sequence of fields and each field is a string. However, 640 /// a reader can also deserialize CSV data into Rust types like `i64` or 641 /// `(String, f64, f64, f64)` or even a custom struct automatically using 642 /// Serde. 643 /// 644 /// # Configuration 645 /// 646 /// A CSV reader has a couple convenient constructor methods like `from_path` 647 /// and `from_reader`. However, if you want to configure the CSV reader to use 648 /// a different delimiter or quote character (among many other things), then 649 /// you should use a [`ReaderBuilder`](struct.ReaderBuilder.html) to construct 650 /// a `Reader`. For example, to change the field delimiter: 651 /// 652 /// ``` 653 /// use std::error::Error; 654 /// use csv::ReaderBuilder; 655 /// 656 /// # fn main() { example().unwrap(); } 657 /// fn example() -> Result<(), Box<dyn Error>> { 658 /// let data = "\ 659 /// city;country;pop 660 /// Boston;United States;4628910 661 /// "; 662 /// let mut rdr = ReaderBuilder::new() 663 /// .delimiter(b';') 664 /// .from_reader(data.as_bytes()); 665 /// 666 /// if let Some(result) = rdr.records().next() { 667 /// let record = result?; 668 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 669 /// Ok(()) 670 /// } else { 671 /// Err(From::from("expected at least one record but got none")) 672 /// } 673 /// } 674 /// ``` 675 /// 676 /// # Error handling 677 /// 678 /// In general, CSV *parsing* does not ever return an error. That is, there is 679 /// no such thing as malformed CSV data. Instead, this reader will prioritize 680 /// finding a parse over rejecting CSV data that it does not understand. This 681 /// choice was inspired by other popular CSV parsers, but also because it is 682 /// pragmatic. CSV data varies wildly, so even if the CSV data is malformed, 683 /// it might still be possible to work with the data. In the land of CSV, there 684 /// is no "right" or "wrong," only "right" and "less right." 685 /// 686 /// With that said, a number of errors can occur while reading CSV data: 687 /// 688 /// * By default, all records in CSV data must have the same number of fields. 689 /// If a record is found with a different number of fields than a prior 690 /// record, then an error is returned. This behavior can be disabled by 691 /// enabling flexible parsing via the `flexible` method on 692 /// [`ReaderBuilder`](struct.ReaderBuilder.html). 693 /// * When reading CSV data from a resource (like a file), it is possible for 694 /// reading from the underlying resource to fail. This will return an error. 695 /// For subsequent calls to the `Reader` after encountering a such error 696 /// (unless `seek` is used), it will behave as if end of file had been 697 /// reached, in order to avoid running into infinite loops when still 698 /// attempting to read the next record when one has errored. 699 /// * When reading CSV data into `String` or `&str` fields (e.g., via a 700 /// [`StringRecord`](struct.StringRecord.html)), UTF-8 is strictly 701 /// enforced. If CSV data is invalid UTF-8, then an error is returned. If 702 /// you want to read invalid UTF-8, then you should use the byte oriented 703 /// APIs such as [`ByteRecord`](struct.ByteRecord.html). If you need explicit 704 /// support for another encoding entirely, then you'll need to use another 705 /// crate to transcode your CSV data to UTF-8 before parsing it. 706 /// * When using Serde to deserialize CSV data into Rust types, it is possible 707 /// for a number of additional errors to occur. For example, deserializing 708 /// a field `xyz` into an `i32` field will result in an error. 709 /// 710 /// For more details on the precise semantics of errors, see the 711 /// [`Error`](enum.Error.html) type. 712 #[derive(Debug)] 713 pub struct Reader<R> { 714 /// The underlying CSV parser. 715 /// 716 /// We explicitly put this on the heap because CoreReader embeds an entire 717 /// DFA transition table, which along with other things, tallies up to 718 /// almost 500 bytes on the stack. 719 core: Box<CoreReader>, 720 /// The underlying reader. 721 rdr: io::BufReader<R>, 722 /// Various state tracking. 723 /// 724 /// There is more state embedded in the `CoreReader`. 725 state: ReaderState, 726 } 727 728 #[derive(Debug)] 729 struct ReaderState { 730 /// When set, this contains the first row of any parsed CSV data. 731 /// 732 /// This is always populated, regardless of whether `has_headers` is set. 733 headers: Option<Headers>, 734 /// When set, the first row of parsed CSV data is excluded from things 735 /// that read records, like iterators and `read_record`. 736 has_headers: bool, 737 /// When set, there is no restriction on the length of records. When not 738 /// set, every record must have the same number of fields, or else an error 739 /// is reported. 740 flexible: bool, 741 trim: Trim, 742 /// The number of fields in the first record parsed. 743 first_field_count: Option<u64>, 744 /// The current position of the parser. 745 /// 746 /// Note that this position is only observable by callers at the start 747 /// of a record. More granular positions are not supported. 748 cur_pos: Position, 749 /// Whether the first record has been read or not. 750 first: bool, 751 /// Whether the reader has been seeked or not. 752 seeked: bool, 753 /// Whether EOF of the underlying reader has been reached or not. 754 /// 755 /// IO errors on the underlying reader will be considered as an EOF for 756 /// subsequent read attempts, as it would be incorrect to keep on trying 757 /// to read when the underlying reader has broken. 758 /// 759 /// For clarity, having the best `Debug` impl and in case they need to be 760 /// treated differently at some point, we store whether the `EOF` is 761 /// considered because an actual EOF happened, or because we encoundered 762 /// an IO error. 763 /// This has no additional runtime cost. 764 eof: ReaderEofState, 765 } 766 767 /// Whether EOF of the underlying reader has been reached or not. 768 /// 769 /// IO errors on the underlying reader will be considered as an EOF for 770 /// subsequent read attempts, as it would be incorrect to keep on trying 771 /// to read when the underlying reader has broken. 772 /// 773 /// For clarity, having the best `Debug` impl and in case they need to be 774 /// treated differently at some point, we store whether the `EOF` is 775 /// considered because an actual EOF happened, or because we encoundered 776 /// an IO error 777 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 778 enum ReaderEofState { 779 NotEof, 780 Eof, 781 IOError, 782 } 783 784 /// Headers encapsulates any data associated with the headers of CSV data. 785 /// 786 /// The headers always correspond to the first row. 787 #[derive(Debug)] 788 struct Headers { 789 /// The header, as raw bytes. 790 byte_record: ByteRecord, 791 /// The header, as valid UTF-8 (or a UTF-8 error). 792 string_record: result::Result<StringRecord, Utf8Error>, 793 } 794 795 impl Reader<Reader<File>> { 796 /// Create a new CSV parser with a default configuration for the given 797 /// file path. 798 /// 799 /// To customize CSV parsing, use a `ReaderBuilder`. 800 /// 801 /// # Example 802 /// 803 /// ```no_run 804 /// use std::error::Error; 805 /// use csv::Reader; 806 /// 807 /// # fn main() { example().unwrap(); } 808 /// fn example() -> Result<(), Box<dyn Error>> { 809 /// let mut rdr = Reader::from_path("foo.csv")?; 810 /// for result in rdr.records() { 811 /// let record = result?; 812 /// println!("{:?}", record); 813 /// } 814 /// Ok(()) 815 /// } 816 /// ``` from_path<P: AsRef<Path>>(path: P) -> Result<Reader<File>>817 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Reader<File>> { 818 ReaderBuilder::new().from_path(path) 819 } 820 } 821 822 impl<R: io::Read> Reader<R> { 823 /// Create a new CSV reader given a builder and a source of underlying 824 /// bytes. new(builder: &ReaderBuilder, rdr: R) -> Reader<R>825 fn new(builder: &ReaderBuilder, rdr: R) -> Reader<R> { 826 Reader { 827 core: Box::new(builder.builder.build()), 828 rdr: io::BufReader::with_capacity(builder.capacity, rdr), 829 state: ReaderState { 830 headers: None, 831 has_headers: builder.has_headers, 832 flexible: builder.flexible, 833 trim: builder.trim, 834 first_field_count: None, 835 cur_pos: Position::new(), 836 first: false, 837 seeked: false, 838 eof: ReaderEofState::NotEof, 839 }, 840 } 841 } 842 843 /// Create a new CSV parser with a default configuration for the given 844 /// reader. 845 /// 846 /// To customize CSV parsing, use a `ReaderBuilder`. 847 /// 848 /// # Example 849 /// 850 /// ``` 851 /// use std::error::Error; 852 /// use csv::Reader; 853 /// 854 /// # fn main() { example().unwrap(); } 855 /// fn example() -> Result<(), Box<dyn Error>> { 856 /// let data = "\ 857 /// city,country,pop 858 /// Boston,United States,4628910 859 /// Concord,United States,42695 860 /// "; 861 /// let mut rdr = Reader::from_reader(data.as_bytes()); 862 /// for result in rdr.records() { 863 /// let record = result?; 864 /// println!("{:?}", record); 865 /// } 866 /// Ok(()) 867 /// } 868 /// ``` from_reader(rdr: R) -> Reader<R>869 pub fn from_reader(rdr: R) -> Reader<R> { 870 ReaderBuilder::new().from_reader(rdr) 871 } 872 873 /// Returns a borrowed iterator over deserialized records. 874 /// 875 /// Each item yielded by this iterator is a `Result<D, Error>`. 876 /// Therefore, in order to access the record, callers must handle the 877 /// possibility of error (typically with `try!` or `?`). 878 /// 879 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 880 /// default), then this does not include the first record. Additionally, 881 /// if `has_headers` is enabled, then deserializing into a struct will 882 /// automatically align the values in each row to the fields of a struct 883 /// based on the header row. 884 /// 885 /// # Example 886 /// 887 /// This shows how to deserialize CSV data into normal Rust structs. The 888 /// fields of the header row are used to match up the values in each row 889 /// to the fields of the struct. 890 /// 891 /// ``` 892 /// use std::error::Error; 893 /// 894 /// #[derive(Debug, serde::Deserialize, Eq, PartialEq)] 895 /// struct Row { 896 /// city: String, 897 /// country: String, 898 /// #[serde(rename = "popcount")] 899 /// population: u64, 900 /// } 901 /// 902 /// # fn main() { example().unwrap(); } 903 /// fn example() -> Result<(), Box<dyn Error>> { 904 /// let data = "\ 905 /// city,country,popcount 906 /// Boston,United States,4628910 907 /// "; 908 /// let mut rdr = csv::Reader::from_reader(data.as_bytes()); 909 /// let mut iter = rdr.deserialize(); 910 /// 911 /// if let Some(result) = iter.next() { 912 /// let record: Row = result?; 913 /// assert_eq!(record, Row { 914 /// city: "Boston".to_string(), 915 /// country: "United States".to_string(), 916 /// population: 4628910, 917 /// }); 918 /// Ok(()) 919 /// } else { 920 /// Err(From::from("expected at least one record but got none")) 921 /// } 922 /// } 923 /// ``` 924 /// 925 /// # Rules 926 /// 927 /// For the most part, any Rust type that maps straight-forwardly to a CSV 928 /// record is supported. This includes maps, structs, tuples and tuple 929 /// structs. Other Rust types, such as `Vec`s, arrays, and enums have 930 /// a more complicated story. In general, when working with CSV data, one 931 /// should avoid *nested sequences* as much as possible. 932 /// 933 /// Maps, structs, tuples and tuple structs map to CSV records in a simple 934 /// way. Tuples and tuple structs decode their fields in the order that 935 /// they are defined. Structs will do the same only if `has_headers` has 936 /// been disabled using [`ReaderBuilder`](struct.ReaderBuilder.html), 937 /// otherwise, structs and maps are deserialized based on the fields 938 /// defined in the header row. (If there is no header row, then 939 /// deserializing into a map will result in an error.) 940 /// 941 /// Nested sequences are supported in a limited capacity. Namely, they 942 /// are flattened. As a result, it's often useful to use a `Vec` to capture 943 /// a "tail" of fields in a record: 944 /// 945 /// ``` 946 /// use std::error::Error; 947 /// 948 /// #[derive(Debug, serde::Deserialize, Eq, PartialEq)] 949 /// struct Row { 950 /// label: String, 951 /// values: Vec<i32>, 952 /// } 953 /// 954 /// # fn main() { example().unwrap(); } 955 /// fn example() -> Result<(), Box<dyn Error>> { 956 /// let data = "foo,1,2,3"; 957 /// let mut rdr = csv::ReaderBuilder::new() 958 /// .has_headers(false) 959 /// .from_reader(data.as_bytes()); 960 /// let mut iter = rdr.deserialize(); 961 /// 962 /// if let Some(result) = iter.next() { 963 /// let record: Row = result?; 964 /// assert_eq!(record, Row { 965 /// label: "foo".to_string(), 966 /// values: vec![1, 2, 3], 967 /// }); 968 /// Ok(()) 969 /// } else { 970 /// Err(From::from("expected at least one record but got none")) 971 /// } 972 /// } 973 /// ``` 974 /// 975 /// In the above example, adding another field to the `Row` struct after 976 /// the `values` field will result in a deserialization error. This is 977 /// because the deserializer doesn't know when to stop reading fields 978 /// into the `values` vector, so it will consume the rest of the fields in 979 /// the record leaving none left over for the additional field. 980 /// 981 /// Finally, simple enums in Rust can be deserialized as well. Namely, 982 /// enums must either be variants with no arguments or variants with a 983 /// single argument. Variants with no arguments are deserialized based on 984 /// which variant name the field matches. Variants with a single argument 985 /// are deserialized based on which variant can store the data. The latter 986 /// is only supported when using "untagged" enum deserialization. The 987 /// following example shows both forms in action: 988 /// 989 /// ``` 990 /// use std::error::Error; 991 /// 992 /// #[derive(Debug, serde::Deserialize, PartialEq)] 993 /// struct Row { 994 /// label: Label, 995 /// value: Number, 996 /// } 997 /// 998 /// #[derive(Debug, serde::Deserialize, PartialEq)] 999 /// #[serde(rename_all = "lowercase")] 1000 /// enum Label { 1001 /// Celsius, 1002 /// Fahrenheit, 1003 /// } 1004 /// 1005 /// #[derive(Debug, serde::Deserialize, PartialEq)] 1006 /// #[serde(untagged)] 1007 /// enum Number { 1008 /// Integer(i64), 1009 /// Float(f64), 1010 /// } 1011 /// 1012 /// # fn main() { example().unwrap(); } 1013 /// fn example() -> Result<(), Box<dyn Error>> { 1014 /// let data = "\ 1015 /// label,value 1016 /// celsius,22.2222 1017 /// fahrenheit,72 1018 /// "; 1019 /// let mut rdr = csv::Reader::from_reader(data.as_bytes()); 1020 /// let mut iter = rdr.deserialize(); 1021 /// 1022 /// // Read the first record. 1023 /// if let Some(result) = iter.next() { 1024 /// let record: Row = result?; 1025 /// assert_eq!(record, Row { 1026 /// label: Label::Celsius, 1027 /// value: Number::Float(22.2222), 1028 /// }); 1029 /// } else { 1030 /// return Err(From::from( 1031 /// "expected at least two records but got none")); 1032 /// } 1033 /// 1034 /// // Read the second record. 1035 /// if let Some(result) = iter.next() { 1036 /// let record: Row = result?; 1037 /// assert_eq!(record, Row { 1038 /// label: Label::Fahrenheit, 1039 /// value: Number::Integer(72), 1040 /// }); 1041 /// Ok(()) 1042 /// } else { 1043 /// Err(From::from( 1044 /// "expected at least two records but got only one")) 1045 /// } 1046 /// } 1047 /// ``` deserialize<D>(&mut self) -> DeserializeRecordsIter<R, D> where D: DeserializeOwned,1048 pub fn deserialize<D>(&mut self) -> DeserializeRecordsIter<R, D> 1049 where 1050 D: DeserializeOwned, 1051 { 1052 DeserializeRecordsIter::new(self) 1053 } 1054 1055 /// Returns an owned iterator over deserialized records. 1056 /// 1057 /// Each item yielded by this iterator is a `Result<D, Error>`. 1058 /// Therefore, in order to access the record, callers must handle the 1059 /// possibility of error (typically with `try!` or `?`). 1060 /// 1061 /// This is mostly useful when you want to return a CSV iterator or store 1062 /// it somewhere. 1063 /// 1064 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 1065 /// default), then this does not include the first record. Additionally, 1066 /// if `has_headers` is enabled, then deserializing into a struct will 1067 /// automatically align the values in each row to the fields of a struct 1068 /// based on the header row. 1069 /// 1070 /// For more detailed deserialization rules, see the documentation on the 1071 /// `deserialize` method. 1072 /// 1073 /// # Example 1074 /// 1075 /// ``` 1076 /// use std::error::Error; 1077 /// 1078 /// #[derive(Debug, serde::Deserialize, Eq, PartialEq)] 1079 /// struct Row { 1080 /// city: String, 1081 /// country: String, 1082 /// #[serde(rename = "popcount")] 1083 /// population: u64, 1084 /// } 1085 /// 1086 /// # fn main() { example().unwrap(); } 1087 /// fn example() -> Result<(), Box<dyn Error>> { 1088 /// let data = "\ 1089 /// city,country,popcount 1090 /// Boston,United States,4628910 1091 /// "; 1092 /// let rdr = csv::Reader::from_reader(data.as_bytes()); 1093 /// let mut iter = rdr.into_deserialize(); 1094 /// 1095 /// if let Some(result) = iter.next() { 1096 /// let record: Row = result?; 1097 /// assert_eq!(record, Row { 1098 /// city: "Boston".to_string(), 1099 /// country: "United States".to_string(), 1100 /// population: 4628910, 1101 /// }); 1102 /// Ok(()) 1103 /// } else { 1104 /// Err(From::from("expected at least one record but got none")) 1105 /// } 1106 /// } 1107 /// ``` into_deserialize<D>(self) -> DeserializeRecordsIntoIter<R, D> where D: DeserializeOwned,1108 pub fn into_deserialize<D>(self) -> DeserializeRecordsIntoIter<R, D> 1109 where 1110 D: DeserializeOwned, 1111 { 1112 DeserializeRecordsIntoIter::new(self) 1113 } 1114 1115 /// Returns a borrowed iterator over all records as strings. 1116 /// 1117 /// Each item yielded by this iterator is a `Result<StringRecord, Error>`. 1118 /// Therefore, in order to access the record, callers must handle the 1119 /// possibility of error (typically with `try!` or `?`). 1120 /// 1121 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 1122 /// default), then this does not include the first record. 1123 /// 1124 /// # Example 1125 /// 1126 /// ``` 1127 /// use std::error::Error; 1128 /// use csv::Reader; 1129 /// 1130 /// # fn main() { example().unwrap(); } 1131 /// fn example() -> Result<(), Box<dyn Error>> { 1132 /// let data = "\ 1133 /// city,country,pop 1134 /// Boston,United States,4628910 1135 /// "; 1136 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1137 /// let mut iter = rdr.records(); 1138 /// 1139 /// if let Some(result) = iter.next() { 1140 /// let record = result?; 1141 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1142 /// Ok(()) 1143 /// } else { 1144 /// Err(From::from("expected at least one record but got none")) 1145 /// } 1146 /// } 1147 /// ``` records(&mut self) -> StringRecordsIter<R>1148 pub fn records(&mut self) -> StringRecordsIter<R> { 1149 StringRecordsIter::new(self) 1150 } 1151 1152 /// Returns an owned iterator over all records as strings. 1153 /// 1154 /// Each item yielded by this iterator is a `Result<StringRecord, Error>`. 1155 /// Therefore, in order to access the record, callers must handle the 1156 /// possibility of error (typically with `try!` or `?`). 1157 /// 1158 /// This is mostly useful when you want to return a CSV iterator or store 1159 /// it somewhere. 1160 /// 1161 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 1162 /// default), then this does not include the first record. 1163 /// 1164 /// # Example 1165 /// 1166 /// ``` 1167 /// use std::error::Error; 1168 /// use csv::Reader; 1169 /// 1170 /// # fn main() { example().unwrap(); } 1171 /// fn example() -> Result<(), Box<dyn Error>> { 1172 /// let data = "\ 1173 /// city,country,pop 1174 /// Boston,United States,4628910 1175 /// "; 1176 /// let rdr = Reader::from_reader(data.as_bytes()); 1177 /// let mut iter = rdr.into_records(); 1178 /// 1179 /// if let Some(result) = iter.next() { 1180 /// let record = result?; 1181 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1182 /// Ok(()) 1183 /// } else { 1184 /// Err(From::from("expected at least one record but got none")) 1185 /// } 1186 /// } 1187 /// ``` into_records(self) -> StringRecordsIntoIter<R>1188 pub fn into_records(self) -> StringRecordsIntoIter<R> { 1189 StringRecordsIntoIter::new(self) 1190 } 1191 1192 /// Returns a borrowed iterator over all records as raw bytes. 1193 /// 1194 /// Each item yielded by this iterator is a `Result<ByteRecord, Error>`. 1195 /// Therefore, in order to access the record, callers must handle the 1196 /// possibility of error (typically with `try!` or `?`). 1197 /// 1198 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 1199 /// default), then this does not include the first record. 1200 /// 1201 /// # Example 1202 /// 1203 /// ``` 1204 /// use std::error::Error; 1205 /// use csv::Reader; 1206 /// 1207 /// # fn main() { example().unwrap(); } 1208 /// fn example() -> Result<(), Box<dyn Error>> { 1209 /// let data = "\ 1210 /// city,country,pop 1211 /// Boston,United States,4628910 1212 /// "; 1213 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1214 /// let mut iter = rdr.byte_records(); 1215 /// 1216 /// if let Some(result) = iter.next() { 1217 /// let record = result?; 1218 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1219 /// Ok(()) 1220 /// } else { 1221 /// Err(From::from("expected at least one record but got none")) 1222 /// } 1223 /// } 1224 /// ``` byte_records(&mut self) -> ByteRecordsIter<R>1225 pub fn byte_records(&mut self) -> ByteRecordsIter<R> { 1226 ByteRecordsIter::new(self) 1227 } 1228 1229 /// Returns an owned iterator over all records as raw bytes. 1230 /// 1231 /// Each item yielded by this iterator is a `Result<ByteRecord, Error>`. 1232 /// Therefore, in order to access the record, callers must handle the 1233 /// possibility of error (typically with `try!` or `?`). 1234 /// 1235 /// This is mostly useful when you want to return a CSV iterator or store 1236 /// it somewhere. 1237 /// 1238 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 1239 /// default), then this does not include the first record. 1240 /// 1241 /// # Example 1242 /// 1243 /// ``` 1244 /// use std::error::Error; 1245 /// use csv::Reader; 1246 /// 1247 /// # fn main() { example().unwrap(); } 1248 /// fn example() -> Result<(), Box<dyn Error>> { 1249 /// let data = "\ 1250 /// city,country,pop 1251 /// Boston,United States,4628910 1252 /// "; 1253 /// let rdr = Reader::from_reader(data.as_bytes()); 1254 /// let mut iter = rdr.into_byte_records(); 1255 /// 1256 /// if let Some(result) = iter.next() { 1257 /// let record = result?; 1258 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1259 /// Ok(()) 1260 /// } else { 1261 /// Err(From::from("expected at least one record but got none")) 1262 /// } 1263 /// } 1264 /// ``` into_byte_records(self) -> ByteRecordsIntoIter<R>1265 pub fn into_byte_records(self) -> ByteRecordsIntoIter<R> { 1266 ByteRecordsIntoIter::new(self) 1267 } 1268 1269 /// Returns a reference to the first row read by this parser. 1270 /// 1271 /// If no row has been read yet, then this will force parsing of the first 1272 /// row. 1273 /// 1274 /// If there was a problem parsing the row or if it wasn't valid UTF-8, 1275 /// then this returns an error. 1276 /// 1277 /// If the underlying reader emits EOF before any data, then this returns 1278 /// an empty record. 1279 /// 1280 /// Note that this method may be used regardless of whether `has_headers` 1281 /// was enabled (but it is enabled by default). 1282 /// 1283 /// # Example 1284 /// 1285 /// This example shows how to get the header row of CSV data. Notice that 1286 /// the header row does not appear as a record in the iterator! 1287 /// 1288 /// ``` 1289 /// use std::error::Error; 1290 /// use csv::Reader; 1291 /// 1292 /// # fn main() { example().unwrap(); } 1293 /// fn example() -> Result<(), Box<dyn Error>> { 1294 /// let data = "\ 1295 /// city,country,pop 1296 /// Boston,United States,4628910 1297 /// "; 1298 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1299 /// 1300 /// // We can read the headers before iterating. 1301 /// { 1302 /// // `headers` borrows from the reader, so we put this in its 1303 /// // own scope. That way, the borrow ends before we try iterating 1304 /// // below. Alternatively, we could clone the headers. 1305 /// let headers = rdr.headers()?; 1306 /// assert_eq!(headers, vec!["city", "country", "pop"]); 1307 /// } 1308 /// 1309 /// if let Some(result) = rdr.records().next() { 1310 /// let record = result?; 1311 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1312 /// } else { 1313 /// return Err(From::from( 1314 /// "expected at least one record but got none")) 1315 /// } 1316 /// 1317 /// // We can also read the headers after iterating. 1318 /// let headers = rdr.headers()?; 1319 /// assert_eq!(headers, vec!["city", "country", "pop"]); 1320 /// Ok(()) 1321 /// } 1322 /// ``` headers(&mut self) -> Result<&StringRecord>1323 pub fn headers(&mut self) -> Result<&StringRecord> { 1324 if self.state.headers.is_none() { 1325 let mut record = ByteRecord::new(); 1326 self.read_byte_record_impl(&mut record)?; 1327 self.set_headers_impl(Err(record)); 1328 } 1329 let headers = self.state.headers.as_ref().unwrap(); 1330 match headers.string_record { 1331 Ok(ref record) => Ok(record), 1332 Err(ref err) => Err(Error::new(ErrorKind::Utf8 { 1333 pos: headers.byte_record.position().map(Clone::clone), 1334 err: err.clone(), 1335 })), 1336 } 1337 } 1338 1339 /// Returns a reference to the first row read by this parser as raw bytes. 1340 /// 1341 /// If no row has been read yet, then this will force parsing of the first 1342 /// row. 1343 /// 1344 /// If there was a problem parsing the row then this returns an error. 1345 /// 1346 /// If the underlying reader emits EOF before any data, then this returns 1347 /// an empty record. 1348 /// 1349 /// Note that this method may be used regardless of whether `has_headers` 1350 /// was enabled (but it is enabled by default). 1351 /// 1352 /// # Example 1353 /// 1354 /// This example shows how to get the header row of CSV data. Notice that 1355 /// the header row does not appear as a record in the iterator! 1356 /// 1357 /// ``` 1358 /// use std::error::Error; 1359 /// use csv::Reader; 1360 /// 1361 /// # fn main() { example().unwrap(); } 1362 /// fn example() -> Result<(), Box<dyn Error>> { 1363 /// let data = "\ 1364 /// city,country,pop 1365 /// Boston,United States,4628910 1366 /// "; 1367 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1368 /// 1369 /// // We can read the headers before iterating. 1370 /// { 1371 /// // `headers` borrows from the reader, so we put this in its 1372 /// // own scope. That way, the borrow ends before we try iterating 1373 /// // below. Alternatively, we could clone the headers. 1374 /// let headers = rdr.byte_headers()?; 1375 /// assert_eq!(headers, vec!["city", "country", "pop"]); 1376 /// } 1377 /// 1378 /// if let Some(result) = rdr.byte_records().next() { 1379 /// let record = result?; 1380 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1381 /// } else { 1382 /// return Err(From::from( 1383 /// "expected at least one record but got none")) 1384 /// } 1385 /// 1386 /// // We can also read the headers after iterating. 1387 /// let headers = rdr.byte_headers()?; 1388 /// assert_eq!(headers, vec!["city", "country", "pop"]); 1389 /// Ok(()) 1390 /// } 1391 /// ``` byte_headers(&mut self) -> Result<&ByteRecord>1392 pub fn byte_headers(&mut self) -> Result<&ByteRecord> { 1393 if self.state.headers.is_none() { 1394 let mut record = ByteRecord::new(); 1395 self.read_byte_record_impl(&mut record)?; 1396 self.set_headers_impl(Err(record)); 1397 } 1398 Ok(&self.state.headers.as_ref().unwrap().byte_record) 1399 } 1400 1401 /// Set the headers of this CSV parser manually. 1402 /// 1403 /// This overrides any other setting (including `set_byte_headers`). Any 1404 /// automatic detection of headers is disabled. This may be called at any 1405 /// time. 1406 /// 1407 /// # Example 1408 /// 1409 /// ``` 1410 /// use std::error::Error; 1411 /// use csv::{Reader, StringRecord}; 1412 /// 1413 /// # fn main() { example().unwrap(); } 1414 /// fn example() -> Result<(), Box<dyn Error>> { 1415 /// let data = "\ 1416 /// city,country,pop 1417 /// Boston,United States,4628910 1418 /// "; 1419 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1420 /// 1421 /// assert_eq!(rdr.headers()?, vec!["city", "country", "pop"]); 1422 /// rdr.set_headers(StringRecord::from(vec!["a", "b", "c"])); 1423 /// assert_eq!(rdr.headers()?, vec!["a", "b", "c"]); 1424 /// 1425 /// Ok(()) 1426 /// } 1427 /// ``` set_headers(&mut self, headers: StringRecord)1428 pub fn set_headers(&mut self, headers: StringRecord) { 1429 self.set_headers_impl(Ok(headers)); 1430 } 1431 1432 /// Set the headers of this CSV parser manually as raw bytes. 1433 /// 1434 /// This overrides any other setting (including `set_headers`). Any 1435 /// automatic detection of headers is disabled. This may be called at any 1436 /// time. 1437 /// 1438 /// # Example 1439 /// 1440 /// ``` 1441 /// use std::error::Error; 1442 /// use csv::{Reader, ByteRecord}; 1443 /// 1444 /// # fn main() { example().unwrap(); } 1445 /// fn example() -> Result<(), Box<dyn Error>> { 1446 /// let data = "\ 1447 /// city,country,pop 1448 /// Boston,United States,4628910 1449 /// "; 1450 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1451 /// 1452 /// assert_eq!(rdr.byte_headers()?, vec!["city", "country", "pop"]); 1453 /// rdr.set_byte_headers(ByteRecord::from(vec!["a", "b", "c"])); 1454 /// assert_eq!(rdr.byte_headers()?, vec!["a", "b", "c"]); 1455 /// 1456 /// Ok(()) 1457 /// } 1458 /// ``` set_byte_headers(&mut self, headers: ByteRecord)1459 pub fn set_byte_headers(&mut self, headers: ByteRecord) { 1460 self.set_headers_impl(Err(headers)); 1461 } 1462 set_headers_impl( &mut self, headers: result::Result<StringRecord, ByteRecord>, )1463 fn set_headers_impl( 1464 &mut self, 1465 headers: result::Result<StringRecord, ByteRecord>, 1466 ) { 1467 // If we have string headers, then get byte headers. But if we have 1468 // byte headers, then get the string headers (or a UTF-8 error). 1469 let (mut str_headers, mut byte_headers) = match headers { 1470 Ok(string) => { 1471 let bytes = string.clone().into_byte_record(); 1472 (Ok(string), bytes) 1473 } 1474 Err(bytes) => { 1475 match StringRecord::from_byte_record(bytes.clone()) { 1476 Ok(str_headers) => (Ok(str_headers), bytes), 1477 Err(err) => (Err(err.utf8_error().clone()), bytes), 1478 } 1479 } 1480 }; 1481 if self.state.trim.should_trim_headers() { 1482 if let Ok(ref mut str_headers) = str_headers.as_mut() { 1483 str_headers.trim(); 1484 } 1485 byte_headers.trim(); 1486 } 1487 self.state.headers = Some(Headers { 1488 byte_record: byte_headers, 1489 string_record: str_headers, 1490 }); 1491 } 1492 1493 /// Read a single row into the given record. Returns false when no more 1494 /// records could be read. 1495 /// 1496 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 1497 /// default), then this will never read the first record. 1498 /// 1499 /// This method is useful when you want to read records as fast as 1500 /// as possible. It's less ergonomic than an iterator, but it permits the 1501 /// caller to reuse the `StringRecord` allocation, which usually results 1502 /// in higher throughput. 1503 /// 1504 /// Records read via this method are guaranteed to have a position set 1505 /// on them, even if the reader is at EOF or if an error is returned. 1506 /// 1507 /// # Example 1508 /// 1509 /// ``` 1510 /// use std::error::Error; 1511 /// use csv::{Reader, StringRecord}; 1512 /// 1513 /// # fn main() { example().unwrap(); } 1514 /// fn example() -> Result<(), Box<dyn Error>> { 1515 /// let data = "\ 1516 /// city,country,pop 1517 /// Boston,United States,4628910 1518 /// "; 1519 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1520 /// let mut record = StringRecord::new(); 1521 /// 1522 /// if rdr.read_record(&mut record)? { 1523 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1524 /// Ok(()) 1525 /// } else { 1526 /// Err(From::from("expected at least one record but got none")) 1527 /// } 1528 /// } 1529 /// ``` read_record(&mut self, record: &mut StringRecord) -> Result<bool>1530 pub fn read_record(&mut self, record: &mut StringRecord) -> Result<bool> { 1531 let result = record.read(self); 1532 // We need to trim again because trimming string records includes 1533 // Unicode whitespace. (ByteRecord trimming only includes ASCII 1534 // whitespace.) 1535 if self.state.trim.should_trim_fields() { 1536 record.trim(); 1537 } 1538 result 1539 } 1540 1541 /// Read a single row into the given byte record. Returns false when no 1542 /// more records could be read. 1543 /// 1544 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 1545 /// default), then this will never read the first record. 1546 /// 1547 /// This method is useful when you want to read records as fast as 1548 /// as possible. It's less ergonomic than an iterator, but it permits the 1549 /// caller to reuse the `ByteRecord` allocation, which usually results 1550 /// in higher throughput. 1551 /// 1552 /// Records read via this method are guaranteed to have a position set 1553 /// on them, even if the reader is at EOF or if an error is returned. 1554 /// 1555 /// # Example 1556 /// 1557 /// ``` 1558 /// use std::error::Error; 1559 /// use csv::{ByteRecord, Reader}; 1560 /// 1561 /// # fn main() { example().unwrap(); } 1562 /// fn example() -> Result<(), Box<dyn Error>> { 1563 /// let data = "\ 1564 /// city,country,pop 1565 /// Boston,United States,4628910 1566 /// "; 1567 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1568 /// let mut record = ByteRecord::new(); 1569 /// 1570 /// if rdr.read_byte_record(&mut record)? { 1571 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1572 /// Ok(()) 1573 /// } else { 1574 /// Err(From::from("expected at least one record but got none")) 1575 /// } 1576 /// } 1577 /// ``` read_byte_record( &mut self, record: &mut ByteRecord, ) -> Result<bool>1578 pub fn read_byte_record( 1579 &mut self, 1580 record: &mut ByteRecord, 1581 ) -> Result<bool> { 1582 if !self.state.seeked && !self.state.has_headers && !self.state.first { 1583 // If the caller indicated "no headers" and we haven't yielded the 1584 // first record yet, then we should yield our header row if we have 1585 // one. 1586 if let Some(ref headers) = self.state.headers { 1587 self.state.first = true; 1588 record.clone_from(&headers.byte_record); 1589 if self.state.trim.should_trim_fields() { 1590 record.trim(); 1591 } 1592 return Ok(!record.is_empty()); 1593 } 1594 } 1595 let ok = self.read_byte_record_impl(record)?; 1596 self.state.first = true; 1597 if !self.state.seeked && self.state.headers.is_none() { 1598 self.set_headers_impl(Err(record.clone())); 1599 // If the end user indicated that we have headers, then we should 1600 // never return the first row. Instead, we should attempt to 1601 // read and return the next one. 1602 if self.state.has_headers { 1603 let result = self.read_byte_record_impl(record); 1604 if self.state.trim.should_trim_fields() { 1605 record.trim(); 1606 } 1607 return result; 1608 } 1609 } 1610 if self.state.trim.should_trim_fields() { 1611 record.trim(); 1612 } 1613 Ok(ok) 1614 } 1615 1616 /// Read a byte record from the underlying CSV reader, without accounting 1617 /// for headers. 1618 #[inline(always)] read_byte_record_impl( &mut self, record: &mut ByteRecord, ) -> Result<bool>1619 fn read_byte_record_impl( 1620 &mut self, 1621 record: &mut ByteRecord, 1622 ) -> Result<bool> { 1623 use csv_core::ReadRecordResult::*; 1624 1625 record.clear(); 1626 record.set_position(Some(self.state.cur_pos.clone())); 1627 if self.state.eof != ReaderEofState::NotEof { 1628 return Ok(false); 1629 } 1630 let (mut outlen, mut endlen) = (0, 0); 1631 loop { 1632 let (res, nin, nout, nend) = { 1633 let input_res = self.rdr.fill_buf(); 1634 if input_res.is_err() { 1635 self.state.eof = ReaderEofState::IOError; 1636 } 1637 let input = input_res?; 1638 let (fields, ends) = record.as_parts(); 1639 self.core.read_record( 1640 input, 1641 &mut fields[outlen..], 1642 &mut ends[endlen..], 1643 ) 1644 }; 1645 self.rdr.consume(nin); 1646 let byte = self.state.cur_pos.byte(); 1647 self.state 1648 .cur_pos 1649 .set_byte(byte + nin as u64) 1650 .set_line(self.core.line()); 1651 outlen += nout; 1652 endlen += nend; 1653 match res { 1654 InputEmpty => continue, 1655 OutputFull => { 1656 record.expand_fields(); 1657 continue; 1658 } 1659 OutputEndsFull => { 1660 record.expand_ends(); 1661 continue; 1662 } 1663 Record => { 1664 record.set_len(endlen); 1665 self.state.add_record(record)?; 1666 return Ok(true); 1667 } 1668 End => { 1669 self.state.eof = ReaderEofState::Eof; 1670 return Ok(false); 1671 } 1672 } 1673 } 1674 } 1675 1676 /// Return the current position of this CSV reader. 1677 /// 1678 /// The byte offset in the position returned can be used to `seek` this 1679 /// reader. In particular, seeking to a position returned here on the same 1680 /// data will result in parsing the same subsequent record. 1681 /// 1682 /// # Example: reading the position 1683 /// 1684 /// ``` 1685 /// use std::{error::Error, io}; 1686 /// use csv::{Reader, Position}; 1687 /// 1688 /// # fn main() { example().unwrap(); } 1689 /// fn example() -> Result<(), Box<dyn Error>> { 1690 /// let data = "\ 1691 /// city,country,popcount 1692 /// Boston,United States,4628910 1693 /// Concord,United States,42695 1694 /// "; 1695 /// let rdr = Reader::from_reader(io::Cursor::new(data)); 1696 /// let mut iter = rdr.into_records(); 1697 /// let mut pos = Position::new(); 1698 /// loop { 1699 /// // Read the position immediately before each record. 1700 /// let next_pos = iter.reader().position().clone(); 1701 /// if iter.next().is_none() { 1702 /// break; 1703 /// } 1704 /// pos = next_pos; 1705 /// } 1706 /// 1707 /// // `pos` should now be the position immediately before the last 1708 /// // record. 1709 /// assert_eq!(pos.byte(), 51); 1710 /// assert_eq!(pos.line(), 3); 1711 /// assert_eq!(pos.record(), 2); 1712 /// Ok(()) 1713 /// } 1714 /// ``` position(&self) -> &Position1715 pub fn position(&self) -> &Position { 1716 &self.state.cur_pos 1717 } 1718 1719 /// Returns true if and only if this reader has been exhausted. 1720 /// 1721 /// When this returns true, no more records can be read from this reader 1722 /// (unless it has been seeked to another position). 1723 /// 1724 /// # Example 1725 /// 1726 /// ``` 1727 /// use std::{error::Error, io}; 1728 /// use csv::{Reader, Position}; 1729 /// 1730 /// # fn main() { example().unwrap(); } 1731 /// fn example() -> Result<(), Box<dyn Error>> { 1732 /// let data = "\ 1733 /// city,country,popcount 1734 /// Boston,United States,4628910 1735 /// Concord,United States,42695 1736 /// "; 1737 /// let mut rdr = Reader::from_reader(io::Cursor::new(data)); 1738 /// assert!(!rdr.is_done()); 1739 /// for result in rdr.records() { 1740 /// let _ = result?; 1741 /// } 1742 /// assert!(rdr.is_done()); 1743 /// Ok(()) 1744 /// } 1745 /// ``` is_done(&self) -> bool1746 pub fn is_done(&self) -> bool { 1747 self.state.eof != ReaderEofState::NotEof 1748 } 1749 1750 /// Returns true if and only if this reader has been configured to 1751 /// interpret the first record as a header record. has_headers(&self) -> bool1752 pub fn has_headers(&self) -> bool { 1753 self.state.has_headers 1754 } 1755 1756 /// Returns a reference to the underlying reader. get_ref(&self) -> &R1757 pub fn get_ref(&self) -> &R { 1758 self.rdr.get_ref() 1759 } 1760 1761 /// Returns a mutable reference to the underlying reader. get_mut(&mut self) -> &mut R1762 pub fn get_mut(&mut self) -> &mut R { 1763 self.rdr.get_mut() 1764 } 1765 1766 /// Unwraps this CSV reader, returning the underlying reader. 1767 /// 1768 /// Note that any leftover data inside this reader's internal buffer is 1769 /// lost. into_inner(self) -> R1770 pub fn into_inner(self) -> R { 1771 self.rdr.into_inner() 1772 } 1773 } 1774 1775 impl<R: io::Read + io::Seek> Reader<R> { 1776 /// Seeks the underlying reader to the position given. 1777 /// 1778 /// This comes with a few caveats: 1779 /// 1780 /// * Any internal buffer associated with this reader is cleared. 1781 /// * If the given position does not correspond to a position immediately 1782 /// before the start of a record, then the behavior of this reader is 1783 /// unspecified. 1784 /// * Any special logic that skips the first record in the CSV reader 1785 /// when reading or iterating over records is disabled. 1786 /// 1787 /// If the given position has a byte offset equivalent to the current 1788 /// position, then no seeking is performed. 1789 /// 1790 /// If the header row has not already been read, then this will attempt 1791 /// to read the header row before seeking. Therefore, it is possible that 1792 /// this returns an error associated with reading CSV data. 1793 /// 1794 /// Note that seeking is performed based only on the byte offset in the 1795 /// given position. Namely, the record or line numbers in the position may 1796 /// be incorrect, but this will cause any future position generated by 1797 /// this CSV reader to be similarly incorrect. 1798 /// 1799 /// # Example: seek to parse a record twice 1800 /// 1801 /// ``` 1802 /// use std::{error::Error, io}; 1803 /// use csv::{Reader, Position}; 1804 /// 1805 /// # fn main() { example().unwrap(); } 1806 /// fn example() -> Result<(), Box<dyn Error>> { 1807 /// let data = "\ 1808 /// city,country,popcount 1809 /// Boston,United States,4628910 1810 /// Concord,United States,42695 1811 /// "; 1812 /// let rdr = Reader::from_reader(io::Cursor::new(data)); 1813 /// let mut iter = rdr.into_records(); 1814 /// let mut pos = Position::new(); 1815 /// loop { 1816 /// // Read the position immediately before each record. 1817 /// let next_pos = iter.reader().position().clone(); 1818 /// if iter.next().is_none() { 1819 /// break; 1820 /// } 1821 /// pos = next_pos; 1822 /// } 1823 /// 1824 /// // Now seek the reader back to `pos`. This will let us read the 1825 /// // last record again. 1826 /// iter.reader_mut().seek(pos)?; 1827 /// let mut iter = iter.into_reader().into_records(); 1828 /// if let Some(result) = iter.next() { 1829 /// let record = result?; 1830 /// assert_eq!(record, vec!["Concord", "United States", "42695"]); 1831 /// Ok(()) 1832 /// } else { 1833 /// Err(From::from("expected at least one record but got none")) 1834 /// } 1835 /// } 1836 /// ``` seek(&mut self, pos: Position) -> Result<()>1837 pub fn seek(&mut self, pos: Position) -> Result<()> { 1838 self.byte_headers()?; 1839 self.state.seeked = true; 1840 if pos.byte() == self.state.cur_pos.byte() { 1841 return Ok(()); 1842 } 1843 self.rdr.seek(io::SeekFrom::Start(pos.byte()))?; 1844 self.core.reset(); 1845 self.core.set_line(pos.line()); 1846 self.state.cur_pos = pos; 1847 self.state.eof = ReaderEofState::NotEof; 1848 Ok(()) 1849 } 1850 1851 /// This is like `seek`, but provides direct control over how the seeking 1852 /// operation is performed via `io::SeekFrom`. 1853 /// 1854 /// The `pos` position given *should* correspond the position indicated 1855 /// by `seek_from`, but there is no requirement. If the `pos` position 1856 /// given is incorrect, then the position information returned by this 1857 /// reader will be similarly incorrect. 1858 /// 1859 /// If the header row has not already been read, then this will attempt 1860 /// to read the header row before seeking. Therefore, it is possible that 1861 /// this returns an error associated with reading CSV data. 1862 /// 1863 /// Unlike `seek`, this will always cause an actual seek to be performed. seek_raw( &mut self, seek_from: io::SeekFrom, pos: Position, ) -> Result<()>1864 pub fn seek_raw( 1865 &mut self, 1866 seek_from: io::SeekFrom, 1867 pos: Position, 1868 ) -> Result<()> { 1869 self.byte_headers()?; 1870 self.state.seeked = true; 1871 self.rdr.seek(seek_from)?; 1872 self.core.reset(); 1873 self.core.set_line(pos.line()); 1874 self.state.cur_pos = pos; 1875 self.state.eof = ReaderEofState::NotEof; 1876 Ok(()) 1877 } 1878 } 1879 1880 impl ReaderState { 1881 #[inline(always)] add_record(&mut self, record: &ByteRecord) -> Result<()>1882 fn add_record(&mut self, record: &ByteRecord) -> Result<()> { 1883 let i = self.cur_pos.record(); 1884 self.cur_pos.set_record(i.checked_add(1).unwrap()); 1885 if !self.flexible { 1886 match self.first_field_count { 1887 None => self.first_field_count = Some(record.len() as u64), 1888 Some(expected) => { 1889 if record.len() as u64 != expected { 1890 return Err(Error::new(ErrorKind::UnequalLengths { 1891 pos: record.position().map(Clone::clone), 1892 expected_len: expected, 1893 len: record.len() as u64, 1894 })); 1895 } 1896 } 1897 } 1898 } 1899 Ok(()) 1900 } 1901 } 1902 1903 /// An owned iterator over deserialized records. 1904 /// 1905 /// The type parameter `R` refers to the underlying `io::Read` type, and `D` 1906 /// refers to the type that this iterator will deserialize a record into. 1907 pub struct DeserializeRecordsIntoIter<R, D> { 1908 rdr: Reader<R>, 1909 rec: StringRecord, 1910 headers: Option<StringRecord>, 1911 _priv: PhantomData<D>, 1912 } 1913 1914 impl<R: io::Read, D: DeserializeOwned> DeserializeRecordsIntoIter<R, D> { new(mut rdr: Reader<R>) -> DeserializeRecordsIntoIter<R, D>1915 fn new(mut rdr: Reader<R>) -> DeserializeRecordsIntoIter<R, D> { 1916 let headers = if !rdr.state.has_headers { 1917 None 1918 } else { 1919 rdr.headers().ok().map(Clone::clone) 1920 }; 1921 DeserializeRecordsIntoIter { 1922 rdr, 1923 rec: StringRecord::new(), 1924 headers, 1925 _priv: PhantomData, 1926 } 1927 } 1928 1929 /// Return a reference to the underlying CSV reader. reader(&self) -> &Reader<R>1930 pub fn reader(&self) -> &Reader<R> { 1931 &self.rdr 1932 } 1933 1934 /// Return a mutable reference to the underlying CSV reader. reader_mut(&mut self) -> &mut Reader<R>1935 pub fn reader_mut(&mut self) -> &mut Reader<R> { 1936 &mut self.rdr 1937 } 1938 1939 /// Drop this iterator and return the underlying CSV reader. into_reader(self) -> Reader<R>1940 pub fn into_reader(self) -> Reader<R> { 1941 self.rdr 1942 } 1943 } 1944 1945 impl<R: io::Read, D: DeserializeOwned> Iterator 1946 for DeserializeRecordsIntoIter<R, D> 1947 { 1948 type Item = Result<D>; 1949 next(&mut self) -> Option<Result<D>>1950 fn next(&mut self) -> Option<Result<D>> { 1951 match self.rdr.read_record(&mut self.rec) { 1952 Err(err) => Some(Err(err)), 1953 Ok(false) => None, 1954 Ok(true) => Some(self.rec.deserialize(self.headers.as_ref())), 1955 } 1956 } 1957 } 1958 1959 /// A borrowed iterator over deserialized records. 1960 /// 1961 /// The lifetime parameter `'r` refers to the lifetime of the underlying 1962 /// CSV `Reader`. The type parameter `R` refers to the underlying `io::Read` 1963 /// type, and `D` refers to the type that this iterator will deserialize a 1964 /// record into. 1965 pub struct DeserializeRecordsIter<'r, R: 'r, D> { 1966 rdr: &'r mut Reader<R>, 1967 rec: StringRecord, 1968 headers: Option<StringRecord>, 1969 _priv: PhantomData<D>, 1970 } 1971 1972 impl<'r, R: io::Read, D: DeserializeOwned> DeserializeRecordsIter<'r, R, D> { new(rdr: &'r mut Reader<R>) -> DeserializeRecordsIter<'r, R, D>1973 fn new(rdr: &'r mut Reader<R>) -> DeserializeRecordsIter<'r, R, D> { 1974 let headers = if !rdr.state.has_headers { 1975 None 1976 } else { 1977 rdr.headers().ok().map(Clone::clone) 1978 }; 1979 DeserializeRecordsIter { 1980 rdr, 1981 rec: StringRecord::new(), 1982 headers, 1983 _priv: PhantomData, 1984 } 1985 } 1986 1987 /// Return a reference to the underlying CSV reader. reader(&self) -> &Reader<R>1988 pub fn reader(&self) -> &Reader<R> { 1989 &self.rdr 1990 } 1991 1992 /// Return a mutable reference to the underlying CSV reader. reader_mut(&mut self) -> &mut Reader<R>1993 pub fn reader_mut(&mut self) -> &mut Reader<R> { 1994 &mut self.rdr 1995 } 1996 } 1997 1998 impl<'r, R: io::Read, D: DeserializeOwned> Iterator 1999 for DeserializeRecordsIter<'r, R, D> 2000 { 2001 type Item = Result<D>; 2002 next(&mut self) -> Option<Result<D>>2003 fn next(&mut self) -> Option<Result<D>> { 2004 match self.rdr.read_record(&mut self.rec) { 2005 Err(err) => Some(Err(err)), 2006 Ok(false) => None, 2007 Ok(true) => Some(self.rec.deserialize(self.headers.as_ref())), 2008 } 2009 } 2010 } 2011 2012 /// An owned iterator over records as strings. 2013 pub struct StringRecordsIntoIter<R> { 2014 rdr: Reader<R>, 2015 rec: StringRecord, 2016 } 2017 2018 impl<R: io::Read> StringRecordsIntoIter<R> { new(rdr: Reader<R>) -> StringRecordsIntoIter<R>2019 fn new(rdr: Reader<R>) -> StringRecordsIntoIter<R> { 2020 StringRecordsIntoIter { rdr, rec: StringRecord::new() } 2021 } 2022 2023 /// Return a reference to the underlying CSV reader. reader(&self) -> &Reader<R>2024 pub fn reader(&self) -> &Reader<R> { 2025 &self.rdr 2026 } 2027 2028 /// Return a mutable reference to the underlying CSV reader. reader_mut(&mut self) -> &mut Reader<R>2029 pub fn reader_mut(&mut self) -> &mut Reader<R> { 2030 &mut self.rdr 2031 } 2032 2033 /// Drop this iterator and return the underlying CSV reader. into_reader(self) -> Reader<R>2034 pub fn into_reader(self) -> Reader<R> { 2035 self.rdr 2036 } 2037 } 2038 2039 impl<R: io::Read> Iterator for StringRecordsIntoIter<R> { 2040 type Item = Result<StringRecord>; 2041 next(&mut self) -> Option<Result<StringRecord>>2042 fn next(&mut self) -> Option<Result<StringRecord>> { 2043 match self.rdr.read_record(&mut self.rec) { 2044 Err(err) => Some(Err(err)), 2045 Ok(true) => Some(Ok(self.rec.clone_truncated())), 2046 Ok(false) => None, 2047 } 2048 } 2049 } 2050 2051 /// A borrowed iterator over records as strings. 2052 /// 2053 /// The lifetime parameter `'r` refers to the lifetime of the underlying 2054 /// CSV `Reader`. 2055 pub struct StringRecordsIter<'r, R: 'r> { 2056 rdr: &'r mut Reader<R>, 2057 rec: StringRecord, 2058 } 2059 2060 impl<'r, R: io::Read> StringRecordsIter<'r, R> { new(rdr: &'r mut Reader<R>) -> StringRecordsIter<'r, R>2061 fn new(rdr: &'r mut Reader<R>) -> StringRecordsIter<'r, R> { 2062 StringRecordsIter { rdr, rec: StringRecord::new() } 2063 } 2064 2065 /// Return a reference to the underlying CSV reader. reader(&self) -> &Reader<R>2066 pub fn reader(&self) -> &Reader<R> { 2067 &self.rdr 2068 } 2069 2070 /// Return a mutable reference to the underlying CSV reader. reader_mut(&mut self) -> &mut Reader<R>2071 pub fn reader_mut(&mut self) -> &mut Reader<R> { 2072 &mut self.rdr 2073 } 2074 } 2075 2076 impl<'r, R: io::Read> Iterator for StringRecordsIter<'r, R> { 2077 type Item = Result<StringRecord>; 2078 next(&mut self) -> Option<Result<StringRecord>>2079 fn next(&mut self) -> Option<Result<StringRecord>> { 2080 match self.rdr.read_record(&mut self.rec) { 2081 Err(err) => Some(Err(err)), 2082 Ok(true) => Some(Ok(self.rec.clone_truncated())), 2083 Ok(false) => None, 2084 } 2085 } 2086 } 2087 2088 /// An owned iterator over records as raw bytes. 2089 pub struct ByteRecordsIntoIter<R> { 2090 rdr: Reader<R>, 2091 rec: ByteRecord, 2092 } 2093 2094 impl<R: io::Read> ByteRecordsIntoIter<R> { new(rdr: Reader<R>) -> ByteRecordsIntoIter<R>2095 fn new(rdr: Reader<R>) -> ByteRecordsIntoIter<R> { 2096 ByteRecordsIntoIter { rdr, rec: ByteRecord::new() } 2097 } 2098 2099 /// Return a reference to the underlying CSV reader. reader(&self) -> &Reader<R>2100 pub fn reader(&self) -> &Reader<R> { 2101 &self.rdr 2102 } 2103 2104 /// Return a mutable reference to the underlying CSV reader. reader_mut(&mut self) -> &mut Reader<R>2105 pub fn reader_mut(&mut self) -> &mut Reader<R> { 2106 &mut self.rdr 2107 } 2108 2109 /// Drop this iterator and return the underlying CSV reader. into_reader(self) -> Reader<R>2110 pub fn into_reader(self) -> Reader<R> { 2111 self.rdr 2112 } 2113 } 2114 2115 impl<R: io::Read> Iterator for ByteRecordsIntoIter<R> { 2116 type Item = Result<ByteRecord>; 2117 next(&mut self) -> Option<Result<ByteRecord>>2118 fn next(&mut self) -> Option<Result<ByteRecord>> { 2119 match self.rdr.read_byte_record(&mut self.rec) { 2120 Err(err) => Some(Err(err)), 2121 Ok(true) => Some(Ok(self.rec.clone_truncated())), 2122 Ok(false) => None, 2123 } 2124 } 2125 } 2126 2127 /// A borrowed iterator over records as raw bytes. 2128 /// 2129 /// The lifetime parameter `'r` refers to the lifetime of the underlying 2130 /// CSV `Reader`. 2131 pub struct ByteRecordsIter<'r, R: 'r> { 2132 rdr: &'r mut Reader<R>, 2133 rec: ByteRecord, 2134 } 2135 2136 impl<'r, R: io::Read> ByteRecordsIter<'r, R> { new(rdr: &'r mut Reader<R>) -> ByteRecordsIter<'r, R>2137 fn new(rdr: &'r mut Reader<R>) -> ByteRecordsIter<'r, R> { 2138 ByteRecordsIter { rdr, rec: ByteRecord::new() } 2139 } 2140 2141 /// Return a reference to the underlying CSV reader. reader(&self) -> &Reader<R>2142 pub fn reader(&self) -> &Reader<R> { 2143 &self.rdr 2144 } 2145 2146 /// Return a mutable reference to the underlying CSV reader. reader_mut(&mut self) -> &mut Reader<R>2147 pub fn reader_mut(&mut self) -> &mut Reader<R> { 2148 &mut self.rdr 2149 } 2150 } 2151 2152 impl<'r, R: io::Read> Iterator for ByteRecordsIter<'r, R> { 2153 type Item = Result<ByteRecord>; 2154 next(&mut self) -> Option<Result<ByteRecord>>2155 fn next(&mut self) -> Option<Result<ByteRecord>> { 2156 match self.rdr.read_byte_record(&mut self.rec) { 2157 Err(err) => Some(Err(err)), 2158 Ok(true) => Some(Ok(self.rec.clone_truncated())), 2159 Ok(false) => None, 2160 } 2161 } 2162 } 2163 2164 #[cfg(test)] 2165 mod tests { 2166 use std::io; 2167 2168 use crate::{ 2169 byte_record::ByteRecord, error::ErrorKind, string_record::StringRecord, 2170 }; 2171 2172 use super::{Position, ReaderBuilder, Trim}; 2173 b(s: &str) -> &[u8]2174 fn b(s: &str) -> &[u8] { 2175 s.as_bytes() 2176 } s(b: &[u8]) -> &str2177 fn s(b: &[u8]) -> &str { 2178 ::std::str::from_utf8(b).unwrap() 2179 } 2180 newpos(byte: u64, line: u64, record: u64) -> Position2181 fn newpos(byte: u64, line: u64, record: u64) -> Position { 2182 let mut p = Position::new(); 2183 p.set_byte(byte).set_line(line).set_record(record); 2184 p 2185 } 2186 2187 #[test] read_byte_record()2188 fn read_byte_record() { 2189 let data = b("foo,\"b,ar\",baz\nabc,mno,xyz"); 2190 let mut rdr = 2191 ReaderBuilder::new().has_headers(false).from_reader(data); 2192 let mut rec = ByteRecord::new(); 2193 2194 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2195 assert_eq!(3, rec.len()); 2196 assert_eq!("foo", s(&rec[0])); 2197 assert_eq!("b,ar", s(&rec[1])); 2198 assert_eq!("baz", s(&rec[2])); 2199 2200 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2201 assert_eq!(3, rec.len()); 2202 assert_eq!("abc", s(&rec[0])); 2203 assert_eq!("mno", s(&rec[1])); 2204 assert_eq!("xyz", s(&rec[2])); 2205 2206 assert!(!rdr.read_byte_record(&mut rec).unwrap()); 2207 } 2208 2209 #[test] read_trimmed_records_and_headers()2210 fn read_trimmed_records_and_headers() { 2211 let data = b("foo, bar,\tbaz\n 1, 2, 3\n1\t,\t,3\t\t"); 2212 let mut rdr = ReaderBuilder::new() 2213 .has_headers(true) 2214 .trim(Trim::All) 2215 .from_reader(data); 2216 let mut rec = ByteRecord::new(); 2217 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2218 assert_eq!("1", s(&rec[0])); 2219 assert_eq!("2", s(&rec[1])); 2220 assert_eq!("3", s(&rec[2])); 2221 let mut rec = StringRecord::new(); 2222 assert!(rdr.read_record(&mut rec).unwrap()); 2223 assert_eq!("1", &rec[0]); 2224 assert_eq!("", &rec[1]); 2225 assert_eq!("3", &rec[2]); 2226 { 2227 let headers = rdr.headers().unwrap(); 2228 assert_eq!(3, headers.len()); 2229 assert_eq!("foo", &headers[0]); 2230 assert_eq!("bar", &headers[1]); 2231 assert_eq!("baz", &headers[2]); 2232 } 2233 } 2234 2235 #[test] read_trimmed_header()2236 fn read_trimmed_header() { 2237 let data = b("foo, bar,\tbaz\n 1, 2, 3\n1\t,\t,3\t\t"); 2238 let mut rdr = ReaderBuilder::new() 2239 .has_headers(true) 2240 .trim(Trim::Headers) 2241 .from_reader(data); 2242 let mut rec = ByteRecord::new(); 2243 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2244 assert_eq!(" 1", s(&rec[0])); 2245 assert_eq!(" 2", s(&rec[1])); 2246 assert_eq!(" 3", s(&rec[2])); 2247 { 2248 let headers = rdr.headers().unwrap(); 2249 assert_eq!(3, headers.len()); 2250 assert_eq!("foo", &headers[0]); 2251 assert_eq!("bar", &headers[1]); 2252 assert_eq!("baz", &headers[2]); 2253 } 2254 } 2255 2256 #[test] read_trimed_header_invalid_utf8()2257 fn read_trimed_header_invalid_utf8() { 2258 let data = &b"foo, b\xFFar,\tbaz\na,b,c\nd,e,f"[..]; 2259 let mut rdr = ReaderBuilder::new() 2260 .has_headers(true) 2261 .trim(Trim::Headers) 2262 .from_reader(data); 2263 let mut rec = StringRecord::new(); 2264 2265 // force the headers to be read 2266 let _ = rdr.read_record(&mut rec); 2267 // Check the byte headers are trimmed 2268 { 2269 let headers = rdr.byte_headers().unwrap(); 2270 assert_eq!(3, headers.len()); 2271 assert_eq!(b"foo", &headers[0]); 2272 assert_eq!(b"b\xFFar", &headers[1]); 2273 assert_eq!(b"baz", &headers[2]); 2274 } 2275 match *rdr.headers().unwrap_err().kind() { 2276 ErrorKind::Utf8 { pos: Some(ref pos), ref err } => { 2277 assert_eq!(pos, &newpos(0, 1, 0)); 2278 assert_eq!(err.field(), 1); 2279 assert_eq!(err.valid_up_to(), 3); 2280 } 2281 ref err => panic!("match failed, got {:?}", err), 2282 } 2283 } 2284 2285 #[test] read_trimmed_records()2286 fn read_trimmed_records() { 2287 let data = b("foo, bar,\tbaz\n 1, 2, 3\n1\t,\t,3\t\t"); 2288 let mut rdr = ReaderBuilder::new() 2289 .has_headers(true) 2290 .trim(Trim::Fields) 2291 .from_reader(data); 2292 let mut rec = ByteRecord::new(); 2293 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2294 assert_eq!("1", s(&rec[0])); 2295 assert_eq!("2", s(&rec[1])); 2296 assert_eq!("3", s(&rec[2])); 2297 { 2298 let headers = rdr.headers().unwrap(); 2299 assert_eq!(3, headers.len()); 2300 assert_eq!("foo", &headers[0]); 2301 assert_eq!(" bar", &headers[1]); 2302 assert_eq!("\tbaz", &headers[2]); 2303 } 2304 } 2305 2306 #[test] read_trimmed_records_without_headers()2307 fn read_trimmed_records_without_headers() { 2308 let data = b("a1, b1\t,\t c1\t\n"); 2309 let mut rdr = ReaderBuilder::new() 2310 .has_headers(false) 2311 .trim(Trim::All) 2312 .from_reader(data); 2313 let mut rec = ByteRecord::new(); 2314 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2315 assert_eq!("a1", s(&rec[0])); 2316 assert_eq!("b1", s(&rec[1])); 2317 assert_eq!("c1", s(&rec[2])); 2318 } 2319 2320 #[test] read_record_unequal_fails()2321 fn read_record_unequal_fails() { 2322 let data = b("foo\nbar,baz"); 2323 let mut rdr = 2324 ReaderBuilder::new().has_headers(false).from_reader(data); 2325 let mut rec = ByteRecord::new(); 2326 2327 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2328 assert_eq!(1, rec.len()); 2329 assert_eq!("foo", s(&rec[0])); 2330 2331 match rdr.read_byte_record(&mut rec) { 2332 Err(err) => match *err.kind() { 2333 ErrorKind::UnequalLengths { 2334 expected_len: 1, 2335 ref pos, 2336 len: 2, 2337 } => { 2338 assert_eq!(pos, &Some(newpos(4, 2, 1))); 2339 } 2340 ref wrong => panic!("match failed, got {:?}", wrong), 2341 }, 2342 wrong => panic!("match failed, got {:?}", wrong), 2343 } 2344 } 2345 2346 #[test] read_record_unequal_ok()2347 fn read_record_unequal_ok() { 2348 let data = b("foo\nbar,baz"); 2349 let mut rdr = ReaderBuilder::new() 2350 .has_headers(false) 2351 .flexible(true) 2352 .from_reader(data); 2353 let mut rec = ByteRecord::new(); 2354 2355 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2356 assert_eq!(1, rec.len()); 2357 assert_eq!("foo", s(&rec[0])); 2358 2359 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2360 assert_eq!(2, rec.len()); 2361 assert_eq!("bar", s(&rec[0])); 2362 assert_eq!("baz", s(&rec[1])); 2363 2364 assert!(!rdr.read_byte_record(&mut rec).unwrap()); 2365 } 2366 2367 // This tests that even if we get a CSV error, we can continue reading 2368 // if we want. 2369 #[test] read_record_unequal_continue()2370 fn read_record_unequal_continue() { 2371 let data = b("foo\nbar,baz\nquux"); 2372 let mut rdr = 2373 ReaderBuilder::new().has_headers(false).from_reader(data); 2374 let mut rec = ByteRecord::new(); 2375 2376 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2377 assert_eq!(1, rec.len()); 2378 assert_eq!("foo", s(&rec[0])); 2379 2380 match rdr.read_byte_record(&mut rec) { 2381 Err(err) => match err.kind() { 2382 &ErrorKind::UnequalLengths { 2383 expected_len: 1, 2384 ref pos, 2385 len: 2, 2386 } => { 2387 assert_eq!(pos, &Some(newpos(4, 2, 1))); 2388 } 2389 wrong => panic!("match failed, got {:?}", wrong), 2390 }, 2391 wrong => panic!("match failed, got {:?}", wrong), 2392 } 2393 2394 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2395 assert_eq!(1, rec.len()); 2396 assert_eq!("quux", s(&rec[0])); 2397 2398 assert!(!rdr.read_byte_record(&mut rec).unwrap()); 2399 } 2400 2401 #[test] read_record_headers()2402 fn read_record_headers() { 2403 let data = b("foo,bar,baz\na,b,c\nd,e,f"); 2404 let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(data); 2405 let mut rec = StringRecord::new(); 2406 2407 assert!(rdr.read_record(&mut rec).unwrap()); 2408 assert_eq!(3, rec.len()); 2409 assert_eq!("a", &rec[0]); 2410 2411 assert!(rdr.read_record(&mut rec).unwrap()); 2412 assert_eq!(3, rec.len()); 2413 assert_eq!("d", &rec[0]); 2414 2415 assert!(!rdr.read_record(&mut rec).unwrap()); 2416 2417 { 2418 let headers = rdr.byte_headers().unwrap(); 2419 assert_eq!(3, headers.len()); 2420 assert_eq!(b"foo", &headers[0]); 2421 assert_eq!(b"bar", &headers[1]); 2422 assert_eq!(b"baz", &headers[2]); 2423 } 2424 { 2425 let headers = rdr.headers().unwrap(); 2426 assert_eq!(3, headers.len()); 2427 assert_eq!("foo", &headers[0]); 2428 assert_eq!("bar", &headers[1]); 2429 assert_eq!("baz", &headers[2]); 2430 } 2431 } 2432 2433 #[test] read_record_headers_invalid_utf8()2434 fn read_record_headers_invalid_utf8() { 2435 let data = &b"foo,b\xFFar,baz\na,b,c\nd,e,f"[..]; 2436 let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(data); 2437 let mut rec = StringRecord::new(); 2438 2439 assert!(rdr.read_record(&mut rec).unwrap()); 2440 assert_eq!(3, rec.len()); 2441 assert_eq!("a", &rec[0]); 2442 2443 assert!(rdr.read_record(&mut rec).unwrap()); 2444 assert_eq!(3, rec.len()); 2445 assert_eq!("d", &rec[0]); 2446 2447 assert!(!rdr.read_record(&mut rec).unwrap()); 2448 2449 // Check that we can read the headers as raw bytes, but that 2450 // if we read them as strings, we get an appropriate UTF-8 error. 2451 { 2452 let headers = rdr.byte_headers().unwrap(); 2453 assert_eq!(3, headers.len()); 2454 assert_eq!(b"foo", &headers[0]); 2455 assert_eq!(b"b\xFFar", &headers[1]); 2456 assert_eq!(b"baz", &headers[2]); 2457 } 2458 match *rdr.headers().unwrap_err().kind() { 2459 ErrorKind::Utf8 { pos: Some(ref pos), ref err } => { 2460 assert_eq!(pos, &newpos(0, 1, 0)); 2461 assert_eq!(err.field(), 1); 2462 assert_eq!(err.valid_up_to(), 1); 2463 } 2464 ref err => panic!("match failed, got {:?}", err), 2465 } 2466 } 2467 2468 #[test] read_record_no_headers_before()2469 fn read_record_no_headers_before() { 2470 let data = b("foo,bar,baz\na,b,c\nd,e,f"); 2471 let mut rdr = 2472 ReaderBuilder::new().has_headers(false).from_reader(data); 2473 let mut rec = StringRecord::new(); 2474 2475 { 2476 let headers = rdr.headers().unwrap(); 2477 assert_eq!(3, headers.len()); 2478 assert_eq!("foo", &headers[0]); 2479 assert_eq!("bar", &headers[1]); 2480 assert_eq!("baz", &headers[2]); 2481 } 2482 2483 assert!(rdr.read_record(&mut rec).unwrap()); 2484 assert_eq!(3, rec.len()); 2485 assert_eq!("foo", &rec[0]); 2486 2487 assert!(rdr.read_record(&mut rec).unwrap()); 2488 assert_eq!(3, rec.len()); 2489 assert_eq!("a", &rec[0]); 2490 2491 assert!(rdr.read_record(&mut rec).unwrap()); 2492 assert_eq!(3, rec.len()); 2493 assert_eq!("d", &rec[0]); 2494 2495 assert!(!rdr.read_record(&mut rec).unwrap()); 2496 } 2497 2498 #[test] read_record_no_headers_after()2499 fn read_record_no_headers_after() { 2500 let data = b("foo,bar,baz\na,b,c\nd,e,f"); 2501 let mut rdr = 2502 ReaderBuilder::new().has_headers(false).from_reader(data); 2503 let mut rec = StringRecord::new(); 2504 2505 assert!(rdr.read_record(&mut rec).unwrap()); 2506 assert_eq!(3, rec.len()); 2507 assert_eq!("foo", &rec[0]); 2508 2509 assert!(rdr.read_record(&mut rec).unwrap()); 2510 assert_eq!(3, rec.len()); 2511 assert_eq!("a", &rec[0]); 2512 2513 assert!(rdr.read_record(&mut rec).unwrap()); 2514 assert_eq!(3, rec.len()); 2515 assert_eq!("d", &rec[0]); 2516 2517 assert!(!rdr.read_record(&mut rec).unwrap()); 2518 2519 let headers = rdr.headers().unwrap(); 2520 assert_eq!(3, headers.len()); 2521 assert_eq!("foo", &headers[0]); 2522 assert_eq!("bar", &headers[1]); 2523 assert_eq!("baz", &headers[2]); 2524 } 2525 2526 #[test] seek()2527 fn seek() { 2528 let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i"); 2529 let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data)); 2530 rdr.seek(newpos(18, 3, 2)).unwrap(); 2531 2532 let mut rec = StringRecord::new(); 2533 2534 assert_eq!(18, rdr.position().byte()); 2535 assert!(rdr.read_record(&mut rec).unwrap()); 2536 assert_eq!(3, rec.len()); 2537 assert_eq!("d", &rec[0]); 2538 2539 assert_eq!(24, rdr.position().byte()); 2540 assert_eq!(4, rdr.position().line()); 2541 assert_eq!(3, rdr.position().record()); 2542 assert!(rdr.read_record(&mut rec).unwrap()); 2543 assert_eq!(3, rec.len()); 2544 assert_eq!("g", &rec[0]); 2545 2546 assert!(!rdr.read_record(&mut rec).unwrap()); 2547 } 2548 2549 // Test that we can read headers after seeking even if the headers weren't 2550 // explicit read before seeking. 2551 #[test] seek_headers_after()2552 fn seek_headers_after() { 2553 let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i"); 2554 let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data)); 2555 rdr.seek(newpos(18, 3, 2)).unwrap(); 2556 assert_eq!(rdr.headers().unwrap(), vec!["foo", "bar", "baz"]); 2557 } 2558 2559 // Test that we can read headers after seeking if the headers were read 2560 // before seeking. 2561 #[test] seek_headers_before_after()2562 fn seek_headers_before_after() { 2563 let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i"); 2564 let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data)); 2565 let headers = rdr.headers().unwrap().clone(); 2566 rdr.seek(newpos(18, 3, 2)).unwrap(); 2567 assert_eq!(&headers, rdr.headers().unwrap()); 2568 } 2569 2570 // Test that even if we didn't read headers before seeking, if we seek to 2571 // the current byte offset, then no seeking is done and therefore we can 2572 // still read headers after seeking. 2573 #[test] seek_headers_no_actual_seek()2574 fn seek_headers_no_actual_seek() { 2575 let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i"); 2576 let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data)); 2577 rdr.seek(Position::new()).unwrap(); 2578 assert_eq!("foo", &rdr.headers().unwrap()[0]); 2579 } 2580 2581 // Test that position info is reported correctly in absence of headers. 2582 #[test] positions_no_headers()2583 fn positions_no_headers() { 2584 let mut rdr = ReaderBuilder::new() 2585 .has_headers(false) 2586 .from_reader("a,b,c\nx,y,z".as_bytes()) 2587 .into_records(); 2588 2589 let pos = rdr.next().unwrap().unwrap().position().unwrap().clone(); 2590 assert_eq!(pos.byte(), 0); 2591 assert_eq!(pos.line(), 1); 2592 assert_eq!(pos.record(), 0); 2593 2594 let pos = rdr.next().unwrap().unwrap().position().unwrap().clone(); 2595 assert_eq!(pos.byte(), 6); 2596 assert_eq!(pos.line(), 2); 2597 assert_eq!(pos.record(), 1); 2598 } 2599 2600 // Test that position info is reported correctly with headers. 2601 #[test] positions_headers()2602 fn positions_headers() { 2603 let mut rdr = ReaderBuilder::new() 2604 .has_headers(true) 2605 .from_reader("a,b,c\nx,y,z".as_bytes()) 2606 .into_records(); 2607 2608 let pos = rdr.next().unwrap().unwrap().position().unwrap().clone(); 2609 assert_eq!(pos.byte(), 6); 2610 assert_eq!(pos.line(), 2); 2611 assert_eq!(pos.record(), 1); 2612 } 2613 2614 // Test that reading headers on empty data yields an empty record. 2615 #[test] headers_on_empty_data()2616 fn headers_on_empty_data() { 2617 let mut rdr = ReaderBuilder::new().from_reader("".as_bytes()); 2618 let r = rdr.byte_headers().unwrap(); 2619 assert_eq!(r.len(), 0); 2620 } 2621 2622 // Test that reading the first record on empty data works. 2623 #[test] no_headers_on_empty_data()2624 fn no_headers_on_empty_data() { 2625 let mut rdr = 2626 ReaderBuilder::new().has_headers(false).from_reader("".as_bytes()); 2627 assert_eq!(rdr.records().count(), 0); 2628 } 2629 2630 // Test that reading the first record on empty data works, even if 2631 // we've tried to read headers before hand. 2632 #[test] no_headers_on_empty_data_after_headers()2633 fn no_headers_on_empty_data_after_headers() { 2634 let mut rdr = 2635 ReaderBuilder::new().has_headers(false).from_reader("".as_bytes()); 2636 assert_eq!(rdr.headers().unwrap().len(), 0); 2637 assert_eq!(rdr.records().count(), 0); 2638 } 2639 } 2640