• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use std::{
2     fs::File,
3     io::{self, BufRead, Seek},
4     marker::PhantomData,
5     path::Path,
6     result,
7 };
8 
9 use {
10     csv_core::{Reader as CoreReader, ReaderBuilder as CoreReaderBuilder},
11     serde::de::DeserializeOwned,
12 };
13 
14 use crate::{
15     byte_record::{ByteRecord, Position},
16     error::{Error, ErrorKind, Result, Utf8Error},
17     string_record::StringRecord,
18     {Terminator, Trim},
19 };
20 
21 /// Builds a CSV reader with various configuration knobs.
22 ///
23 /// This builder can be used to tweak the field delimiter, record terminator
24 /// and more. Once a CSV `Reader` is built, its configuration cannot be
25 /// changed.
26 #[derive(Debug)]
27 pub struct ReaderBuilder {
28     capacity: usize,
29     flexible: bool,
30     has_headers: bool,
31     trim: Trim,
32     /// The underlying CSV parser builder.
33     ///
34     /// We explicitly put this on the heap because CoreReaderBuilder embeds an
35     /// entire DFA transition table, which along with other things, tallies up
36     /// to almost 500 bytes on the stack.
37     builder: Box<CoreReaderBuilder>,
38 }
39 
40 impl Default for ReaderBuilder {
default() -> ReaderBuilder41     fn default() -> ReaderBuilder {
42         ReaderBuilder {
43             capacity: 8 * (1 << 10),
44             flexible: false,
45             has_headers: true,
46             trim: Trim::default(),
47             builder: Box::new(CoreReaderBuilder::default()),
48         }
49     }
50 }
51 
52 impl ReaderBuilder {
53     /// Create a new builder for configuring CSV parsing.
54     ///
55     /// To convert a builder into a reader, call one of the methods starting
56     /// with `from_`.
57     ///
58     /// # Example
59     ///
60     /// ```
61     /// use std::error::Error;
62     /// use csv::{ReaderBuilder, StringRecord};
63     ///
64     /// # fn main() { example().unwrap(); }
65     /// fn example() -> Result<(), Box<dyn Error>> {
66     ///     let data = "\
67     /// city,country,pop
68     /// Boston,United States,4628910
69     /// Concord,United States,42695
70     /// ";
71     ///     let mut rdr = ReaderBuilder::new().from_reader(data.as_bytes());
72     ///
73     ///     let records = rdr
74     ///         .records()
75     ///         .collect::<Result<Vec<StringRecord>, csv::Error>>()?;
76     ///     assert_eq!(records, vec![
77     ///         vec!["Boston", "United States", "4628910"],
78     ///         vec!["Concord", "United States", "42695"],
79     ///     ]);
80     ///     Ok(())
81     /// }
82     /// ```
new() -> ReaderBuilder83     pub fn new() -> ReaderBuilder {
84         ReaderBuilder::default()
85     }
86 
87     /// Build a CSV parser from this configuration that reads data from the
88     /// given file path.
89     ///
90     /// If there was a problem opening the file at the given path, then this
91     /// returns the corresponding error.
92     ///
93     /// # Example
94     ///
95     /// ```no_run
96     /// use std::error::Error;
97     /// use csv::ReaderBuilder;
98     ///
99     /// # fn main() { example().unwrap(); }
100     /// fn example() -> Result<(), Box<dyn Error>> {
101     ///     let mut rdr = ReaderBuilder::new().from_path("foo.csv")?;
102     ///     for result in rdr.records() {
103     ///         let record = result?;
104     ///         println!("{:?}", record);
105     ///     }
106     ///     Ok(())
107     /// }
108     /// ```
from_path<P: AsRef<Path>>(&self, path: P) -> Result<Reader<File>>109     pub fn from_path<P: AsRef<Path>>(&self, path: P) -> Result<Reader<File>> {
110         Ok(Reader::new(self, File::open(path)?))
111     }
112 
113     /// Build a CSV parser from this configuration that reads data from `rdr`.
114     ///
115     /// Note that the CSV reader is buffered automatically, so you should not
116     /// wrap `rdr` in a buffered reader like `io::BufReader`.
117     ///
118     /// # Example
119     ///
120     /// ```
121     /// use std::error::Error;
122     /// use csv::ReaderBuilder;
123     ///
124     /// # fn main() { example().unwrap(); }
125     /// fn example() -> Result<(), Box<dyn Error>> {
126     ///     let data = "\
127     /// city,country,pop
128     /// Boston,United States,4628910
129     /// Concord,United States,42695
130     /// ";
131     ///     let mut rdr = ReaderBuilder::new().from_reader(data.as_bytes());
132     ///     for result in rdr.records() {
133     ///         let record = result?;
134     ///         println!("{:?}", record);
135     ///     }
136     ///     Ok(())
137     /// }
138     /// ```
from_reader<R: io::Read>(&self, rdr: R) -> Reader<R>139     pub fn from_reader<R: io::Read>(&self, rdr: R) -> Reader<R> {
140         Reader::new(self, rdr)
141     }
142 
143     /// The field delimiter to use when parsing CSV.
144     ///
145     /// The default is `b','`.
146     ///
147     /// # Example
148     ///
149     /// ```
150     /// use std::error::Error;
151     /// use csv::ReaderBuilder;
152     ///
153     /// # fn main() { example().unwrap(); }
154     /// fn example() -> Result<(), Box<dyn Error>> {
155     ///     let data = "\
156     /// city;country;pop
157     /// Boston;United States;4628910
158     /// ";
159     ///     let mut rdr = ReaderBuilder::new()
160     ///         .delimiter(b';')
161     ///         .from_reader(data.as_bytes());
162     ///
163     ///     if let Some(result) = rdr.records().next() {
164     ///         let record = result?;
165     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
166     ///         Ok(())
167     ///     } else {
168     ///         Err(From::from("expected at least one record but got none"))
169     ///     }
170     /// }
171     /// ```
delimiter(&mut self, delimiter: u8) -> &mut ReaderBuilder172     pub fn delimiter(&mut self, delimiter: u8) -> &mut ReaderBuilder {
173         self.builder.delimiter(delimiter);
174         self
175     }
176 
177     /// Whether to treat the first row as a special header row.
178     ///
179     /// By default, the first row is treated as a special header row, which
180     /// means the header is never returned by any of the record reading methods
181     /// or iterators. When this is disabled (`yes` set to `false`), the first
182     /// row is not treated specially.
183     ///
184     /// Note that the `headers` and `byte_headers` methods are unaffected by
185     /// whether this is set. Those methods always return the first record.
186     ///
187     /// # Example
188     ///
189     /// This example shows what happens when `has_headers` is disabled.
190     /// Namely, the first row is treated just like any other row.
191     ///
192     /// ```
193     /// use std::error::Error;
194     /// use csv::ReaderBuilder;
195     ///
196     /// # fn main() { example().unwrap(); }
197     /// fn example() -> Result<(), Box<dyn Error>> {
198     ///     let data = "\
199     /// city,country,pop
200     /// Boston,United States,4628910
201     /// ";
202     ///     let mut rdr = ReaderBuilder::new()
203     ///         .has_headers(false)
204     ///         .from_reader(data.as_bytes());
205     ///     let mut iter = rdr.records();
206     ///
207     ///     // Read the first record.
208     ///     if let Some(result) = iter.next() {
209     ///         let record = result?;
210     ///         assert_eq!(record, vec!["city", "country", "pop"]);
211     ///     } else {
212     ///         return Err(From::from(
213     ///             "expected at least two records but got none"));
214     ///     }
215     ///
216     ///     // Read the second record.
217     ///     if let Some(result) = iter.next() {
218     ///         let record = result?;
219     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
220     ///     } else {
221     ///         return Err(From::from(
222     ///             "expected at least two records but got one"))
223     ///     }
224     ///     Ok(())
225     /// }
226     /// ```
has_headers(&mut self, yes: bool) -> &mut ReaderBuilder227     pub fn has_headers(&mut self, yes: bool) -> &mut ReaderBuilder {
228         self.has_headers = yes;
229         self
230     }
231 
232     /// Whether the number of fields in records is allowed to change or not.
233     ///
234     /// When disabled (which is the default), parsing CSV data will return an
235     /// error if a record is found with a number of fields different from the
236     /// number of fields in a previous record.
237     ///
238     /// When enabled, this error checking is turned off.
239     ///
240     /// # Example: flexible records enabled
241     ///
242     /// ```
243     /// use std::error::Error;
244     /// use csv::ReaderBuilder;
245     ///
246     /// # fn main() { example().unwrap(); }
247     /// fn example() -> Result<(), Box<dyn Error>> {
248     ///     // Notice that the first row is missing the population count.
249     ///     let data = "\
250     /// city,country,pop
251     /// Boston,United States
252     /// ";
253     ///     let mut rdr = ReaderBuilder::new()
254     ///         .flexible(true)
255     ///         .from_reader(data.as_bytes());
256     ///
257     ///     if let Some(result) = rdr.records().next() {
258     ///         let record = result?;
259     ///         assert_eq!(record, vec!["Boston", "United States"]);
260     ///         Ok(())
261     ///     } else {
262     ///         Err(From::from("expected at least one record but got none"))
263     ///     }
264     /// }
265     /// ```
266     ///
267     /// # Example: flexible records disabled
268     ///
269     /// This shows the error that appears when records of unequal length
270     /// are found and flexible records have been disabled (which is the
271     /// default).
272     ///
273     /// ```
274     /// use std::error::Error;
275     /// use csv::{ErrorKind, ReaderBuilder};
276     ///
277     /// # fn main() { example().unwrap(); }
278     /// fn example() -> Result<(), Box<dyn Error>> {
279     ///     // Notice that the first row is missing the population count.
280     ///     let data = "\
281     /// city,country,pop
282     /// Boston,United States
283     /// ";
284     ///     let mut rdr = ReaderBuilder::new()
285     ///         .flexible(false)
286     ///         .from_reader(data.as_bytes());
287     ///
288     ///     if let Some(Err(err)) = rdr.records().next() {
289     ///         match *err.kind() {
290     ///             ErrorKind::UnequalLengths { expected_len, len, .. } => {
291     ///                 // The header row has 3 fields...
292     ///                 assert_eq!(expected_len, 3);
293     ///                 // ... but the first row has only 2 fields.
294     ///                 assert_eq!(len, 2);
295     ///                 Ok(())
296     ///             }
297     ///             ref wrong => {
298     ///                 Err(From::from(format!(
299     ///                     "expected UnequalLengths error but got {:?}",
300     ///                     wrong)))
301     ///             }
302     ///         }
303     ///     } else {
304     ///         Err(From::from(
305     ///             "expected at least one errored record but got none"))
306     ///     }
307     /// }
308     /// ```
flexible(&mut self, yes: bool) -> &mut ReaderBuilder309     pub fn flexible(&mut self, yes: bool) -> &mut ReaderBuilder {
310         self.flexible = yes;
311         self
312     }
313 
314     /// Whether fields are trimmed of leading and trailing whitespace or not.
315     ///
316     /// By default, no trimming is performed. This method permits one to
317     /// override that behavior and choose one of the following options:
318     ///
319     /// 1. `Trim::Headers` trims only header values.
320     /// 2. `Trim::Fields` trims only non-header or "field" values.
321     /// 3. `Trim::All` trims both header and non-header values.
322     ///
323     /// A value is only interpreted as a header value if this CSV reader is
324     /// configured to read a header record (which is the default).
325     ///
326     /// When reading string records, characters meeting the definition of
327     /// Unicode whitespace are trimmed. When reading byte records, characters
328     /// meeting the definition of ASCII whitespace are trimmed. ASCII
329     /// whitespace characters correspond to the set `[\t\n\v\f\r ]`.
330     ///
331     /// # Example
332     ///
333     /// This example shows what happens when all values are trimmed.
334     ///
335     /// ```
336     /// use std::error::Error;
337     /// use csv::{ReaderBuilder, StringRecord, Trim};
338     ///
339     /// # fn main() { example().unwrap(); }
340     /// fn example() -> Result<(), Box<dyn Error>> {
341     ///     let data = "\
342     /// city ,   country ,  pop
343     /// Boston,\"
344     ///    United States\",4628910
345     /// Concord,   United States   ,42695
346     /// ";
347     ///     let mut rdr = ReaderBuilder::new()
348     ///         .trim(Trim::All)
349     ///         .from_reader(data.as_bytes());
350     ///     let records = rdr
351     ///         .records()
352     ///         .collect::<Result<Vec<StringRecord>, csv::Error>>()?;
353     ///     assert_eq!(records, vec![
354     ///         vec!["Boston", "United States", "4628910"],
355     ///         vec!["Concord", "United States", "42695"],
356     ///     ]);
357     ///     Ok(())
358     /// }
359     /// ```
trim(&mut self, trim: Trim) -> &mut ReaderBuilder360     pub fn trim(&mut self, trim: Trim) -> &mut ReaderBuilder {
361         self.trim = trim;
362         self
363     }
364 
365     /// The record terminator to use when parsing CSV.
366     ///
367     /// A record terminator can be any single byte. The default is a special
368     /// value, `Terminator::CRLF`, which treats any occurrence of `\r`, `\n`
369     /// or `\r\n` as a single record terminator.
370     ///
371     /// # Example: `$` as a record terminator
372     ///
373     /// ```
374     /// use std::error::Error;
375     /// use csv::{ReaderBuilder, Terminator};
376     ///
377     /// # fn main() { example().unwrap(); }
378     /// fn example() -> Result<(), Box<dyn Error>> {
379     ///     let data = "city,country,pop$Boston,United States,4628910";
380     ///     let mut rdr = ReaderBuilder::new()
381     ///         .terminator(Terminator::Any(b'$'))
382     ///         .from_reader(data.as_bytes());
383     ///
384     ///     if let Some(result) = rdr.records().next() {
385     ///         let record = result?;
386     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
387     ///         Ok(())
388     ///     } else {
389     ///         Err(From::from("expected at least one record but got none"))
390     ///     }
391     /// }
392     /// ```
terminator(&mut self, term: Terminator) -> &mut ReaderBuilder393     pub fn terminator(&mut self, term: Terminator) -> &mut ReaderBuilder {
394         self.builder.terminator(term.to_core());
395         self
396     }
397 
398     /// The quote character to use when parsing CSV.
399     ///
400     /// The default is `b'"'`.
401     ///
402     /// # Example: single quotes instead of double quotes
403     ///
404     /// ```
405     /// use std::error::Error;
406     /// use csv::ReaderBuilder;
407     ///
408     /// # fn main() { example().unwrap(); }
409     /// fn example() -> Result<(), Box<dyn Error>> {
410     ///     let data = "\
411     /// city,country,pop
412     /// Boston,'United States',4628910
413     /// ";
414     ///     let mut rdr = ReaderBuilder::new()
415     ///         .quote(b'\'')
416     ///         .from_reader(data.as_bytes());
417     ///
418     ///     if let Some(result) = rdr.records().next() {
419     ///         let record = result?;
420     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
421     ///         Ok(())
422     ///     } else {
423     ///         Err(From::from("expected at least one record but got none"))
424     ///     }
425     /// }
426     /// ```
quote(&mut self, quote: u8) -> &mut ReaderBuilder427     pub fn quote(&mut self, quote: u8) -> &mut ReaderBuilder {
428         self.builder.quote(quote);
429         self
430     }
431 
432     /// The escape character to use when parsing CSV.
433     ///
434     /// In some variants of CSV, quotes are escaped using a special escape
435     /// character like `\` (instead of escaping quotes by doubling them).
436     ///
437     /// By default, recognizing these idiosyncratic escapes is disabled.
438     ///
439     /// # Example
440     ///
441     /// ```
442     /// use std::error::Error;
443     /// use csv::ReaderBuilder;
444     ///
445     /// # fn main() { example().unwrap(); }
446     /// fn example() -> Result<(), Box<dyn Error>> {
447     ///     let data = "\
448     /// city,country,pop
449     /// Boston,\"The \\\"United\\\" States\",4628910
450     /// ";
451     ///     let mut rdr = ReaderBuilder::new()
452     ///         .escape(Some(b'\\'))
453     ///         .from_reader(data.as_bytes());
454     ///
455     ///     if let Some(result) = rdr.records().next() {
456     ///         let record = result?;
457     ///         assert_eq!(record, vec![
458     ///             "Boston", "The \"United\" States", "4628910",
459     ///         ]);
460     ///         Ok(())
461     ///     } else {
462     ///         Err(From::from("expected at least one record but got none"))
463     ///     }
464     /// }
465     /// ```
escape(&mut self, escape: Option<u8>) -> &mut ReaderBuilder466     pub fn escape(&mut self, escape: Option<u8>) -> &mut ReaderBuilder {
467         self.builder.escape(escape);
468         self
469     }
470 
471     /// Enable double quote escapes.
472     ///
473     /// This is enabled by default, but it may be disabled. When disabled,
474     /// doubled quotes are not interpreted as escapes.
475     ///
476     /// # Example
477     ///
478     /// ```
479     /// use std::error::Error;
480     /// use csv::ReaderBuilder;
481     ///
482     /// # fn main() { example().unwrap(); }
483     /// fn example() -> Result<(), Box<dyn Error>> {
484     ///     let data = "\
485     /// city,country,pop
486     /// Boston,\"The \"\"United\"\" States\",4628910
487     /// ";
488     ///     let mut rdr = ReaderBuilder::new()
489     ///         .double_quote(false)
490     ///         .from_reader(data.as_bytes());
491     ///
492     ///     if let Some(result) = rdr.records().next() {
493     ///         let record = result?;
494     ///         assert_eq!(record, vec![
495     ///             "Boston", "The \"United\"\" States\"", "4628910",
496     ///         ]);
497     ///         Ok(())
498     ///     } else {
499     ///         Err(From::from("expected at least one record but got none"))
500     ///     }
501     /// }
502     /// ```
double_quote(&mut self, yes: bool) -> &mut ReaderBuilder503     pub fn double_quote(&mut self, yes: bool) -> &mut ReaderBuilder {
504         self.builder.double_quote(yes);
505         self
506     }
507 
508     /// Enable or disable quoting.
509     ///
510     /// This is enabled by default, but it may be disabled. When disabled,
511     /// quotes are not treated specially.
512     ///
513     /// # Example
514     ///
515     /// ```
516     /// use std::error::Error;
517     /// use csv::ReaderBuilder;
518     ///
519     /// # fn main() { example().unwrap(); }
520     /// fn example() -> Result<(), Box<dyn Error>> {
521     ///     let data = "\
522     /// city,country,pop
523     /// Boston,\"The United States,4628910
524     /// ";
525     ///     let mut rdr = ReaderBuilder::new()
526     ///         .quoting(false)
527     ///         .from_reader(data.as_bytes());
528     ///
529     ///     if let Some(result) = rdr.records().next() {
530     ///         let record = result?;
531     ///         assert_eq!(record, vec![
532     ///             "Boston", "\"The United States", "4628910",
533     ///         ]);
534     ///         Ok(())
535     ///     } else {
536     ///         Err(From::from("expected at least one record but got none"))
537     ///     }
538     /// }
539     /// ```
quoting(&mut self, yes: bool) -> &mut ReaderBuilder540     pub fn quoting(&mut self, yes: bool) -> &mut ReaderBuilder {
541         self.builder.quoting(yes);
542         self
543     }
544 
545     /// The comment character to use when parsing CSV.
546     ///
547     /// If the start of a record begins with the byte given here, then that
548     /// line is ignored by the CSV parser.
549     ///
550     /// This is disabled by default.
551     ///
552     /// # Example
553     ///
554     /// ```
555     /// use std::error::Error;
556     /// use csv::ReaderBuilder;
557     ///
558     /// # fn main() { example().unwrap(); }
559     /// fn example() -> Result<(), Box<dyn Error>> {
560     ///     let data = "\
561     /// city,country,pop
562     /// #Concord,United States,42695
563     /// Boston,United States,4628910
564     /// ";
565     ///     let mut rdr = ReaderBuilder::new()
566     ///         .comment(Some(b'#'))
567     ///         .from_reader(data.as_bytes());
568     ///
569     ///     if let Some(result) = rdr.records().next() {
570     ///         let record = result?;
571     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
572     ///         Ok(())
573     ///     } else {
574     ///         Err(From::from("expected at least one record but got none"))
575     ///     }
576     /// }
577     /// ```
comment(&mut self, comment: Option<u8>) -> &mut ReaderBuilder578     pub fn comment(&mut self, comment: Option<u8>) -> &mut ReaderBuilder {
579         self.builder.comment(comment);
580         self
581     }
582 
583     /// A convenience method for specifying a configuration to read ASCII
584     /// delimited text.
585     ///
586     /// This sets the delimiter and record terminator to the ASCII unit
587     /// separator (`\x1F`) and record separator (`\x1E`), respectively.
588     ///
589     /// # Example
590     ///
591     /// ```
592     /// use std::error::Error;
593     /// use csv::ReaderBuilder;
594     ///
595     /// # fn main() { example().unwrap(); }
596     /// fn example() -> Result<(), Box<dyn Error>> {
597     ///     let data = "\
598     /// city\x1Fcountry\x1Fpop\x1EBoston\x1FUnited States\x1F4628910";
599     ///     let mut rdr = ReaderBuilder::new()
600     ///         .ascii()
601     ///         .from_reader(data.as_bytes());
602     ///
603     ///     if let Some(result) = rdr.records().next() {
604     ///         let record = result?;
605     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
606     ///         Ok(())
607     ///     } else {
608     ///         Err(From::from("expected at least one record but got none"))
609     ///     }
610     /// }
611     /// ```
ascii(&mut self) -> &mut ReaderBuilder612     pub fn ascii(&mut self) -> &mut ReaderBuilder {
613         self.builder.ascii();
614         self
615     }
616 
617     /// Set the capacity (in bytes) of the buffer used in the CSV reader.
618     /// This defaults to a reasonable setting.
buffer_capacity(&mut self, capacity: usize) -> &mut ReaderBuilder619     pub fn buffer_capacity(&mut self, capacity: usize) -> &mut ReaderBuilder {
620         self.capacity = capacity;
621         self
622     }
623 
624     /// Enable or disable the NFA for parsing CSV.
625     ///
626     /// This is intended to be a debug option. The NFA is always slower than
627     /// the DFA.
628     #[doc(hidden)]
nfa(&mut self, yes: bool) -> &mut ReaderBuilder629     pub fn nfa(&mut self, yes: bool) -> &mut ReaderBuilder {
630         self.builder.nfa(yes);
631         self
632     }
633 }
634 
635 /// A already configured CSV reader.
636 ///
637 /// A CSV reader takes as input CSV data and transforms that into standard Rust
638 /// values. The most flexible way to read CSV data is as a sequence of records,
639 /// where a record is a sequence of fields and each field is a string. However,
640 /// a reader can also deserialize CSV data into Rust types like `i64` or
641 /// `(String, f64, f64, f64)` or even a custom struct automatically using
642 /// Serde.
643 ///
644 /// # Configuration
645 ///
646 /// A CSV reader has a couple convenient constructor methods like `from_path`
647 /// and `from_reader`. However, if you want to configure the CSV reader to use
648 /// a different delimiter or quote character (among many other things), then
649 /// you should use a [`ReaderBuilder`](struct.ReaderBuilder.html) to construct
650 /// a `Reader`. For example, to change the field delimiter:
651 ///
652 /// ```
653 /// use std::error::Error;
654 /// use csv::ReaderBuilder;
655 ///
656 /// # fn main() { example().unwrap(); }
657 /// fn example() -> Result<(), Box<dyn Error>> {
658 ///     let data = "\
659 /// city;country;pop
660 /// Boston;United States;4628910
661 /// ";
662 ///     let mut rdr = ReaderBuilder::new()
663 ///         .delimiter(b';')
664 ///         .from_reader(data.as_bytes());
665 ///
666 ///     if let Some(result) = rdr.records().next() {
667 ///         let record = result?;
668 ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
669 ///         Ok(())
670 ///     } else {
671 ///         Err(From::from("expected at least one record but got none"))
672 ///     }
673 /// }
674 /// ```
675 ///
676 /// # Error handling
677 ///
678 /// In general, CSV *parsing* does not ever return an error. That is, there is
679 /// no such thing as malformed CSV data. Instead, this reader will prioritize
680 /// finding a parse over rejecting CSV data that it does not understand. This
681 /// choice was inspired by other popular CSV parsers, but also because it is
682 /// pragmatic. CSV data varies wildly, so even if the CSV data is malformed,
683 /// it might still be possible to work with the data. In the land of CSV, there
684 /// is no "right" or "wrong," only "right" and "less right."
685 ///
686 /// With that said, a number of errors can occur while reading CSV data:
687 ///
688 /// * By default, all records in CSV data must have the same number of fields.
689 ///   If a record is found with a different number of fields than a prior
690 ///   record, then an error is returned. This behavior can be disabled by
691 ///   enabling flexible parsing via the `flexible` method on
692 ///   [`ReaderBuilder`](struct.ReaderBuilder.html).
693 /// * When reading CSV data from a resource (like a file), it is possible for
694 ///   reading from the underlying resource to fail. This will return an error.
695 ///   For subsequent calls to the `Reader` after encountering a such error
696 ///   (unless `seek` is used), it will behave as if end of file had been
697 ///   reached, in order to avoid running into infinite loops when still
698 ///   attempting to read the next record when one has errored.
699 /// * When reading CSV data into `String` or `&str` fields (e.g., via a
700 ///   [`StringRecord`](struct.StringRecord.html)), UTF-8 is strictly
701 ///   enforced. If CSV data is invalid UTF-8, then an error is returned. If
702 ///   you want to read invalid UTF-8, then you should use the byte oriented
703 ///   APIs such as [`ByteRecord`](struct.ByteRecord.html). If you need explicit
704 ///   support for another encoding entirely, then you'll need to use another
705 ///   crate to transcode your CSV data to UTF-8 before parsing it.
706 /// * When using Serde to deserialize CSV data into Rust types, it is possible
707 ///   for a number of additional errors to occur. For example, deserializing
708 ///   a field `xyz` into an `i32` field will result in an error.
709 ///
710 /// For more details on the precise semantics of errors, see the
711 /// [`Error`](enum.Error.html) type.
712 #[derive(Debug)]
713 pub struct Reader<R> {
714     /// The underlying CSV parser.
715     ///
716     /// We explicitly put this on the heap because CoreReader embeds an entire
717     /// DFA transition table, which along with other things, tallies up to
718     /// almost 500 bytes on the stack.
719     core: Box<CoreReader>,
720     /// The underlying reader.
721     rdr: io::BufReader<R>,
722     /// Various state tracking.
723     ///
724     /// There is more state embedded in the `CoreReader`.
725     state: ReaderState,
726 }
727 
728 #[derive(Debug)]
729 struct ReaderState {
730     /// When set, this contains the first row of any parsed CSV data.
731     ///
732     /// This is always populated, regardless of whether `has_headers` is set.
733     headers: Option<Headers>,
734     /// When set, the first row of parsed CSV data is excluded from things
735     /// that read records, like iterators and `read_record`.
736     has_headers: bool,
737     /// When set, there is no restriction on the length of records. When not
738     /// set, every record must have the same number of fields, or else an error
739     /// is reported.
740     flexible: bool,
741     trim: Trim,
742     /// The number of fields in the first record parsed.
743     first_field_count: Option<u64>,
744     /// The current position of the parser.
745     ///
746     /// Note that this position is only observable by callers at the start
747     /// of a record. More granular positions are not supported.
748     cur_pos: Position,
749     /// Whether the first record has been read or not.
750     first: bool,
751     /// Whether the reader has been seeked or not.
752     seeked: bool,
753     /// Whether EOF of the underlying reader has been reached or not.
754     ///
755     /// IO errors on the underlying reader will be considered as an EOF for
756     /// subsequent read attempts, as it would be incorrect to keep on trying
757     /// to read when the underlying reader has broken.
758     ///
759     /// For clarity, having the best `Debug` impl and in case they need to be
760     /// treated differently at some point, we store whether the `EOF` is
761     /// considered because an actual EOF happened, or because we encoundered
762     /// an IO error.
763     /// This has no additional runtime cost.
764     eof: ReaderEofState,
765 }
766 
767 /// Whether EOF of the underlying reader has been reached or not.
768 ///
769 /// IO errors on the underlying reader will be considered as an EOF for
770 /// subsequent read attempts, as it would be incorrect to keep on trying
771 /// to read when the underlying reader has broken.
772 ///
773 /// For clarity, having the best `Debug` impl and in case they need to be
774 /// treated differently at some point, we store whether the `EOF` is
775 /// considered because an actual EOF happened, or because we encoundered
776 /// an IO error
777 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
778 enum ReaderEofState {
779     NotEof,
780     Eof,
781     IOError,
782 }
783 
784 /// Headers encapsulates any data associated with the headers of CSV data.
785 ///
786 /// The headers always correspond to the first row.
787 #[derive(Debug)]
788 struct Headers {
789     /// The header, as raw bytes.
790     byte_record: ByteRecord,
791     /// The header, as valid UTF-8 (or a UTF-8 error).
792     string_record: result::Result<StringRecord, Utf8Error>,
793 }
794 
795 impl Reader<Reader<File>> {
796     /// Create a new CSV parser with a default configuration for the given
797     /// file path.
798     ///
799     /// To customize CSV parsing, use a `ReaderBuilder`.
800     ///
801     /// # Example
802     ///
803     /// ```no_run
804     /// use std::error::Error;
805     /// use csv::Reader;
806     ///
807     /// # fn main() { example().unwrap(); }
808     /// fn example() -> Result<(), Box<dyn Error>> {
809     ///     let mut rdr = Reader::from_path("foo.csv")?;
810     ///     for result in rdr.records() {
811     ///         let record = result?;
812     ///         println!("{:?}", record);
813     ///     }
814     ///     Ok(())
815     /// }
816     /// ```
from_path<P: AsRef<Path>>(path: P) -> Result<Reader<File>>817     pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Reader<File>> {
818         ReaderBuilder::new().from_path(path)
819     }
820 }
821 
822 impl<R: io::Read> Reader<R> {
823     /// Create a new CSV reader given a builder and a source of underlying
824     /// bytes.
new(builder: &ReaderBuilder, rdr: R) -> Reader<R>825     fn new(builder: &ReaderBuilder, rdr: R) -> Reader<R> {
826         Reader {
827             core: Box::new(builder.builder.build()),
828             rdr: io::BufReader::with_capacity(builder.capacity, rdr),
829             state: ReaderState {
830                 headers: None,
831                 has_headers: builder.has_headers,
832                 flexible: builder.flexible,
833                 trim: builder.trim,
834                 first_field_count: None,
835                 cur_pos: Position::new(),
836                 first: false,
837                 seeked: false,
838                 eof: ReaderEofState::NotEof,
839             },
840         }
841     }
842 
843     /// Create a new CSV parser with a default configuration for the given
844     /// reader.
845     ///
846     /// To customize CSV parsing, use a `ReaderBuilder`.
847     ///
848     /// # Example
849     ///
850     /// ```
851     /// use std::error::Error;
852     /// use csv::Reader;
853     ///
854     /// # fn main() { example().unwrap(); }
855     /// fn example() -> Result<(), Box<dyn Error>> {
856     ///     let data = "\
857     /// city,country,pop
858     /// Boston,United States,4628910
859     /// Concord,United States,42695
860     /// ";
861     ///     let mut rdr = Reader::from_reader(data.as_bytes());
862     ///     for result in rdr.records() {
863     ///         let record = result?;
864     ///         println!("{:?}", record);
865     ///     }
866     ///     Ok(())
867     /// }
868     /// ```
from_reader(rdr: R) -> Reader<R>869     pub fn from_reader(rdr: R) -> Reader<R> {
870         ReaderBuilder::new().from_reader(rdr)
871     }
872 
873     /// Returns a borrowed iterator over deserialized records.
874     ///
875     /// Each item yielded by this iterator is a `Result<D, Error>`.
876     /// Therefore, in order to access the record, callers must handle the
877     /// possibility of error (typically with `try!` or `?`).
878     ///
879     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
880     /// default), then this does not include the first record. Additionally,
881     /// if `has_headers` is enabled, then deserializing into a struct will
882     /// automatically align the values in each row to the fields of a struct
883     /// based on the header row.
884     ///
885     /// # Example
886     ///
887     /// This shows how to deserialize CSV data into normal Rust structs. The
888     /// fields of the header row are used to match up the values in each row
889     /// to the fields of the struct.
890     ///
891     /// ```
892     /// use std::error::Error;
893     ///
894     /// #[derive(Debug, serde::Deserialize, Eq, PartialEq)]
895     /// struct Row {
896     ///     city: String,
897     ///     country: String,
898     ///     #[serde(rename = "popcount")]
899     ///     population: u64,
900     /// }
901     ///
902     /// # fn main() { example().unwrap(); }
903     /// fn example() -> Result<(), Box<dyn Error>> {
904     ///     let data = "\
905     /// city,country,popcount
906     /// Boston,United States,4628910
907     /// ";
908     ///     let mut rdr = csv::Reader::from_reader(data.as_bytes());
909     ///     let mut iter = rdr.deserialize();
910     ///
911     ///     if let Some(result) = iter.next() {
912     ///         let record: Row = result?;
913     ///         assert_eq!(record, Row {
914     ///             city: "Boston".to_string(),
915     ///             country: "United States".to_string(),
916     ///             population: 4628910,
917     ///         });
918     ///         Ok(())
919     ///     } else {
920     ///         Err(From::from("expected at least one record but got none"))
921     ///     }
922     /// }
923     /// ```
924     ///
925     /// # Rules
926     ///
927     /// For the most part, any Rust type that maps straight-forwardly to a CSV
928     /// record is supported. This includes maps, structs, tuples and tuple
929     /// structs. Other Rust types, such as `Vec`s, arrays, and enums have
930     /// a more complicated story. In general, when working with CSV data, one
931     /// should avoid *nested sequences* as much as possible.
932     ///
933     /// Maps, structs, tuples and tuple structs map to CSV records in a simple
934     /// way. Tuples and tuple structs decode their fields in the order that
935     /// they are defined. Structs will do the same only if `has_headers` has
936     /// been disabled using [`ReaderBuilder`](struct.ReaderBuilder.html),
937     /// otherwise, structs and maps are deserialized based on the fields
938     /// defined in the header row. (If there is no header row, then
939     /// deserializing into a map will result in an error.)
940     ///
941     /// Nested sequences are supported in a limited capacity. Namely, they
942     /// are flattened. As a result, it's often useful to use a `Vec` to capture
943     /// a "tail" of fields in a record:
944     ///
945     /// ```
946     /// use std::error::Error;
947     ///
948     /// #[derive(Debug, serde::Deserialize, Eq, PartialEq)]
949     /// struct Row {
950     ///     label: String,
951     ///     values: Vec<i32>,
952     /// }
953     ///
954     /// # fn main() { example().unwrap(); }
955     /// fn example() -> Result<(), Box<dyn Error>> {
956     ///     let data = "foo,1,2,3";
957     ///     let mut rdr = csv::ReaderBuilder::new()
958     ///         .has_headers(false)
959     ///         .from_reader(data.as_bytes());
960     ///     let mut iter = rdr.deserialize();
961     ///
962     ///     if let Some(result) = iter.next() {
963     ///         let record: Row = result?;
964     ///         assert_eq!(record, Row {
965     ///             label: "foo".to_string(),
966     ///             values: vec![1, 2, 3],
967     ///         });
968     ///         Ok(())
969     ///     } else {
970     ///         Err(From::from("expected at least one record but got none"))
971     ///     }
972     /// }
973     /// ```
974     ///
975     /// In the above example, adding another field to the `Row` struct after
976     /// the `values` field will result in a deserialization error. This is
977     /// because the deserializer doesn't know when to stop reading fields
978     /// into the `values` vector, so it will consume the rest of the fields in
979     /// the record leaving none left over for the additional field.
980     ///
981     /// Finally, simple enums in Rust can be deserialized as well. Namely,
982     /// enums must either be variants with no arguments or variants with a
983     /// single argument. Variants with no arguments are deserialized based on
984     /// which variant name the field matches. Variants with a single argument
985     /// are deserialized based on which variant can store the data. The latter
986     /// is only supported when using "untagged" enum deserialization. The
987     /// following example shows both forms in action:
988     ///
989     /// ```
990     /// use std::error::Error;
991     ///
992     /// #[derive(Debug, serde::Deserialize, PartialEq)]
993     /// struct Row {
994     ///     label: Label,
995     ///     value: Number,
996     /// }
997     ///
998     /// #[derive(Debug, serde::Deserialize, PartialEq)]
999     /// #[serde(rename_all = "lowercase")]
1000     /// enum Label {
1001     ///     Celsius,
1002     ///     Fahrenheit,
1003     /// }
1004     ///
1005     /// #[derive(Debug, serde::Deserialize, PartialEq)]
1006     /// #[serde(untagged)]
1007     /// enum Number {
1008     ///     Integer(i64),
1009     ///     Float(f64),
1010     /// }
1011     ///
1012     /// # fn main() { example().unwrap(); }
1013     /// fn example() -> Result<(), Box<dyn Error>> {
1014     ///     let data = "\
1015     /// label,value
1016     /// celsius,22.2222
1017     /// fahrenheit,72
1018     /// ";
1019     ///     let mut rdr = csv::Reader::from_reader(data.as_bytes());
1020     ///     let mut iter = rdr.deserialize();
1021     ///
1022     ///     // Read the first record.
1023     ///     if let Some(result) = iter.next() {
1024     ///         let record: Row = result?;
1025     ///         assert_eq!(record, Row {
1026     ///             label: Label::Celsius,
1027     ///             value: Number::Float(22.2222),
1028     ///         });
1029     ///     } else {
1030     ///         return Err(From::from(
1031     ///             "expected at least two records but got none"));
1032     ///     }
1033     ///
1034     ///     // Read the second record.
1035     ///     if let Some(result) = iter.next() {
1036     ///         let record: Row = result?;
1037     ///         assert_eq!(record, Row {
1038     ///             label: Label::Fahrenheit,
1039     ///             value: Number::Integer(72),
1040     ///         });
1041     ///         Ok(())
1042     ///     } else {
1043     ///         Err(From::from(
1044     ///             "expected at least two records but got only one"))
1045     ///     }
1046     /// }
1047     /// ```
deserialize<D>(&mut self) -> DeserializeRecordsIter<R, D> where D: DeserializeOwned,1048     pub fn deserialize<D>(&mut self) -> DeserializeRecordsIter<R, D>
1049     where
1050         D: DeserializeOwned,
1051     {
1052         DeserializeRecordsIter::new(self)
1053     }
1054 
1055     /// Returns an owned iterator over deserialized records.
1056     ///
1057     /// Each item yielded by this iterator is a `Result<D, Error>`.
1058     /// Therefore, in order to access the record, callers must handle the
1059     /// possibility of error (typically with `try!` or `?`).
1060     ///
1061     /// This is mostly useful when you want to return a CSV iterator or store
1062     /// it somewhere.
1063     ///
1064     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1065     /// default), then this does not include the first record. Additionally,
1066     /// if `has_headers` is enabled, then deserializing into a struct will
1067     /// automatically align the values in each row to the fields of a struct
1068     /// based on the header row.
1069     ///
1070     /// For more detailed deserialization rules, see the documentation on the
1071     /// `deserialize` method.
1072     ///
1073     /// # Example
1074     ///
1075     /// ```
1076     /// use std::error::Error;
1077     ///
1078     /// #[derive(Debug, serde::Deserialize, Eq, PartialEq)]
1079     /// struct Row {
1080     ///     city: String,
1081     ///     country: String,
1082     ///     #[serde(rename = "popcount")]
1083     ///     population: u64,
1084     /// }
1085     ///
1086     /// # fn main() { example().unwrap(); }
1087     /// fn example() -> Result<(), Box<dyn Error>> {
1088     ///     let data = "\
1089     /// city,country,popcount
1090     /// Boston,United States,4628910
1091     /// ";
1092     ///     let rdr = csv::Reader::from_reader(data.as_bytes());
1093     ///     let mut iter = rdr.into_deserialize();
1094     ///
1095     ///     if let Some(result) = iter.next() {
1096     ///         let record: Row = result?;
1097     ///         assert_eq!(record, Row {
1098     ///             city: "Boston".to_string(),
1099     ///             country: "United States".to_string(),
1100     ///             population: 4628910,
1101     ///         });
1102     ///         Ok(())
1103     ///     } else {
1104     ///         Err(From::from("expected at least one record but got none"))
1105     ///     }
1106     /// }
1107     /// ```
into_deserialize<D>(self) -> DeserializeRecordsIntoIter<R, D> where D: DeserializeOwned,1108     pub fn into_deserialize<D>(self) -> DeserializeRecordsIntoIter<R, D>
1109     where
1110         D: DeserializeOwned,
1111     {
1112         DeserializeRecordsIntoIter::new(self)
1113     }
1114 
1115     /// Returns a borrowed iterator over all records as strings.
1116     ///
1117     /// Each item yielded by this iterator is a `Result<StringRecord, Error>`.
1118     /// Therefore, in order to access the record, callers must handle the
1119     /// possibility of error (typically with `try!` or `?`).
1120     ///
1121     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1122     /// default), then this does not include the first record.
1123     ///
1124     /// # Example
1125     ///
1126     /// ```
1127     /// use std::error::Error;
1128     /// use csv::Reader;
1129     ///
1130     /// # fn main() { example().unwrap(); }
1131     /// fn example() -> Result<(), Box<dyn Error>> {
1132     ///     let data = "\
1133     /// city,country,pop
1134     /// Boston,United States,4628910
1135     /// ";
1136     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1137     ///     let mut iter = rdr.records();
1138     ///
1139     ///     if let Some(result) = iter.next() {
1140     ///         let record = result?;
1141     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1142     ///         Ok(())
1143     ///     } else {
1144     ///         Err(From::from("expected at least one record but got none"))
1145     ///     }
1146     /// }
1147     /// ```
records(&mut self) -> StringRecordsIter<R>1148     pub fn records(&mut self) -> StringRecordsIter<R> {
1149         StringRecordsIter::new(self)
1150     }
1151 
1152     /// Returns an owned iterator over all records as strings.
1153     ///
1154     /// Each item yielded by this iterator is a `Result<StringRecord, Error>`.
1155     /// Therefore, in order to access the record, callers must handle the
1156     /// possibility of error (typically with `try!` or `?`).
1157     ///
1158     /// This is mostly useful when you want to return a CSV iterator or store
1159     /// it somewhere.
1160     ///
1161     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1162     /// default), then this does not include the first record.
1163     ///
1164     /// # Example
1165     ///
1166     /// ```
1167     /// use std::error::Error;
1168     /// use csv::Reader;
1169     ///
1170     /// # fn main() { example().unwrap(); }
1171     /// fn example() -> Result<(), Box<dyn Error>> {
1172     ///     let data = "\
1173     /// city,country,pop
1174     /// Boston,United States,4628910
1175     /// ";
1176     ///     let rdr = Reader::from_reader(data.as_bytes());
1177     ///     let mut iter = rdr.into_records();
1178     ///
1179     ///     if let Some(result) = iter.next() {
1180     ///         let record = result?;
1181     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1182     ///         Ok(())
1183     ///     } else {
1184     ///         Err(From::from("expected at least one record but got none"))
1185     ///     }
1186     /// }
1187     /// ```
into_records(self) -> StringRecordsIntoIter<R>1188     pub fn into_records(self) -> StringRecordsIntoIter<R> {
1189         StringRecordsIntoIter::new(self)
1190     }
1191 
1192     /// Returns a borrowed iterator over all records as raw bytes.
1193     ///
1194     /// Each item yielded by this iterator is a `Result<ByteRecord, Error>`.
1195     /// Therefore, in order to access the record, callers must handle the
1196     /// possibility of error (typically with `try!` or `?`).
1197     ///
1198     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1199     /// default), then this does not include the first record.
1200     ///
1201     /// # Example
1202     ///
1203     /// ```
1204     /// use std::error::Error;
1205     /// use csv::Reader;
1206     ///
1207     /// # fn main() { example().unwrap(); }
1208     /// fn example() -> Result<(), Box<dyn Error>> {
1209     ///     let data = "\
1210     /// city,country,pop
1211     /// Boston,United States,4628910
1212     /// ";
1213     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1214     ///     let mut iter = rdr.byte_records();
1215     ///
1216     ///     if let Some(result) = iter.next() {
1217     ///         let record = result?;
1218     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1219     ///         Ok(())
1220     ///     } else {
1221     ///         Err(From::from("expected at least one record but got none"))
1222     ///     }
1223     /// }
1224     /// ```
byte_records(&mut self) -> ByteRecordsIter<R>1225     pub fn byte_records(&mut self) -> ByteRecordsIter<R> {
1226         ByteRecordsIter::new(self)
1227     }
1228 
1229     /// Returns an owned iterator over all records as raw bytes.
1230     ///
1231     /// Each item yielded by this iterator is a `Result<ByteRecord, Error>`.
1232     /// Therefore, in order to access the record, callers must handle the
1233     /// possibility of error (typically with `try!` or `?`).
1234     ///
1235     /// This is mostly useful when you want to return a CSV iterator or store
1236     /// it somewhere.
1237     ///
1238     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1239     /// default), then this does not include the first record.
1240     ///
1241     /// # Example
1242     ///
1243     /// ```
1244     /// use std::error::Error;
1245     /// use csv::Reader;
1246     ///
1247     /// # fn main() { example().unwrap(); }
1248     /// fn example() -> Result<(), Box<dyn Error>> {
1249     ///     let data = "\
1250     /// city,country,pop
1251     /// Boston,United States,4628910
1252     /// ";
1253     ///     let rdr = Reader::from_reader(data.as_bytes());
1254     ///     let mut iter = rdr.into_byte_records();
1255     ///
1256     ///     if let Some(result) = iter.next() {
1257     ///         let record = result?;
1258     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1259     ///         Ok(())
1260     ///     } else {
1261     ///         Err(From::from("expected at least one record but got none"))
1262     ///     }
1263     /// }
1264     /// ```
into_byte_records(self) -> ByteRecordsIntoIter<R>1265     pub fn into_byte_records(self) -> ByteRecordsIntoIter<R> {
1266         ByteRecordsIntoIter::new(self)
1267     }
1268 
1269     /// Returns a reference to the first row read by this parser.
1270     ///
1271     /// If no row has been read yet, then this will force parsing of the first
1272     /// row.
1273     ///
1274     /// If there was a problem parsing the row or if it wasn't valid UTF-8,
1275     /// then this returns an error.
1276     ///
1277     /// If the underlying reader emits EOF before any data, then this returns
1278     /// an empty record.
1279     ///
1280     /// Note that this method may be used regardless of whether `has_headers`
1281     /// was enabled (but it is enabled by default).
1282     ///
1283     /// # Example
1284     ///
1285     /// This example shows how to get the header row of CSV data. Notice that
1286     /// the header row does not appear as a record in the iterator!
1287     ///
1288     /// ```
1289     /// use std::error::Error;
1290     /// use csv::Reader;
1291     ///
1292     /// # fn main() { example().unwrap(); }
1293     /// fn example() -> Result<(), Box<dyn Error>> {
1294     ///     let data = "\
1295     /// city,country,pop
1296     /// Boston,United States,4628910
1297     /// ";
1298     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1299     ///
1300     ///     // We can read the headers before iterating.
1301     ///     {
1302     ///         // `headers` borrows from the reader, so we put this in its
1303     ///         // own scope. That way, the borrow ends before we try iterating
1304     ///         // below. Alternatively, we could clone the headers.
1305     ///         let headers = rdr.headers()?;
1306     ///         assert_eq!(headers, vec!["city", "country", "pop"]);
1307     ///     }
1308     ///
1309     ///     if let Some(result) = rdr.records().next() {
1310     ///         let record = result?;
1311     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1312     ///     } else {
1313     ///         return Err(From::from(
1314     ///             "expected at least one record but got none"))
1315     ///     }
1316     ///
1317     ///     // We can also read the headers after iterating.
1318     ///     let headers = rdr.headers()?;
1319     ///     assert_eq!(headers, vec!["city", "country", "pop"]);
1320     ///     Ok(())
1321     /// }
1322     /// ```
headers(&mut self) -> Result<&StringRecord>1323     pub fn headers(&mut self) -> Result<&StringRecord> {
1324         if self.state.headers.is_none() {
1325             let mut record = ByteRecord::new();
1326             self.read_byte_record_impl(&mut record)?;
1327             self.set_headers_impl(Err(record));
1328         }
1329         let headers = self.state.headers.as_ref().unwrap();
1330         match headers.string_record {
1331             Ok(ref record) => Ok(record),
1332             Err(ref err) => Err(Error::new(ErrorKind::Utf8 {
1333                 pos: headers.byte_record.position().map(Clone::clone),
1334                 err: err.clone(),
1335             })),
1336         }
1337     }
1338 
1339     /// Returns a reference to the first row read by this parser as raw bytes.
1340     ///
1341     /// If no row has been read yet, then this will force parsing of the first
1342     /// row.
1343     ///
1344     /// If there was a problem parsing the row then this returns an error.
1345     ///
1346     /// If the underlying reader emits EOF before any data, then this returns
1347     /// an empty record.
1348     ///
1349     /// Note that this method may be used regardless of whether `has_headers`
1350     /// was enabled (but it is enabled by default).
1351     ///
1352     /// # Example
1353     ///
1354     /// This example shows how to get the header row of CSV data. Notice that
1355     /// the header row does not appear as a record in the iterator!
1356     ///
1357     /// ```
1358     /// use std::error::Error;
1359     /// use csv::Reader;
1360     ///
1361     /// # fn main() { example().unwrap(); }
1362     /// fn example() -> Result<(), Box<dyn Error>> {
1363     ///     let data = "\
1364     /// city,country,pop
1365     /// Boston,United States,4628910
1366     /// ";
1367     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1368     ///
1369     ///     // We can read the headers before iterating.
1370     ///     {
1371     ///         // `headers` borrows from the reader, so we put this in its
1372     ///         // own scope. That way, the borrow ends before we try iterating
1373     ///         // below. Alternatively, we could clone the headers.
1374     ///         let headers = rdr.byte_headers()?;
1375     ///         assert_eq!(headers, vec!["city", "country", "pop"]);
1376     ///     }
1377     ///
1378     ///     if let Some(result) = rdr.byte_records().next() {
1379     ///         let record = result?;
1380     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1381     ///     } else {
1382     ///         return Err(From::from(
1383     ///             "expected at least one record but got none"))
1384     ///     }
1385     ///
1386     ///     // We can also read the headers after iterating.
1387     ///     let headers = rdr.byte_headers()?;
1388     ///     assert_eq!(headers, vec!["city", "country", "pop"]);
1389     ///     Ok(())
1390     /// }
1391     /// ```
byte_headers(&mut self) -> Result<&ByteRecord>1392     pub fn byte_headers(&mut self) -> Result<&ByteRecord> {
1393         if self.state.headers.is_none() {
1394             let mut record = ByteRecord::new();
1395             self.read_byte_record_impl(&mut record)?;
1396             self.set_headers_impl(Err(record));
1397         }
1398         Ok(&self.state.headers.as_ref().unwrap().byte_record)
1399     }
1400 
1401     /// Set the headers of this CSV parser manually.
1402     ///
1403     /// This overrides any other setting (including `set_byte_headers`). Any
1404     /// automatic detection of headers is disabled. This may be called at any
1405     /// time.
1406     ///
1407     /// # Example
1408     ///
1409     /// ```
1410     /// use std::error::Error;
1411     /// use csv::{Reader, StringRecord};
1412     ///
1413     /// # fn main() { example().unwrap(); }
1414     /// fn example() -> Result<(), Box<dyn Error>> {
1415     ///     let data = "\
1416     /// city,country,pop
1417     /// Boston,United States,4628910
1418     /// ";
1419     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1420     ///
1421     ///     assert_eq!(rdr.headers()?, vec!["city", "country", "pop"]);
1422     ///     rdr.set_headers(StringRecord::from(vec!["a", "b", "c"]));
1423     ///     assert_eq!(rdr.headers()?, vec!["a", "b", "c"]);
1424     ///
1425     ///     Ok(())
1426     /// }
1427     /// ```
set_headers(&mut self, headers: StringRecord)1428     pub fn set_headers(&mut self, headers: StringRecord) {
1429         self.set_headers_impl(Ok(headers));
1430     }
1431 
1432     /// Set the headers of this CSV parser manually as raw bytes.
1433     ///
1434     /// This overrides any other setting (including `set_headers`). Any
1435     /// automatic detection of headers is disabled. This may be called at any
1436     /// time.
1437     ///
1438     /// # Example
1439     ///
1440     /// ```
1441     /// use std::error::Error;
1442     /// use csv::{Reader, ByteRecord};
1443     ///
1444     /// # fn main() { example().unwrap(); }
1445     /// fn example() -> Result<(), Box<dyn Error>> {
1446     ///     let data = "\
1447     /// city,country,pop
1448     /// Boston,United States,4628910
1449     /// ";
1450     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1451     ///
1452     ///     assert_eq!(rdr.byte_headers()?, vec!["city", "country", "pop"]);
1453     ///     rdr.set_byte_headers(ByteRecord::from(vec!["a", "b", "c"]));
1454     ///     assert_eq!(rdr.byte_headers()?, vec!["a", "b", "c"]);
1455     ///
1456     ///     Ok(())
1457     /// }
1458     /// ```
set_byte_headers(&mut self, headers: ByteRecord)1459     pub fn set_byte_headers(&mut self, headers: ByteRecord) {
1460         self.set_headers_impl(Err(headers));
1461     }
1462 
set_headers_impl( &mut self, headers: result::Result<StringRecord, ByteRecord>, )1463     fn set_headers_impl(
1464         &mut self,
1465         headers: result::Result<StringRecord, ByteRecord>,
1466     ) {
1467         // If we have string headers, then get byte headers. But if we have
1468         // byte headers, then get the string headers (or a UTF-8 error).
1469         let (mut str_headers, mut byte_headers) = match headers {
1470             Ok(string) => {
1471                 let bytes = string.clone().into_byte_record();
1472                 (Ok(string), bytes)
1473             }
1474             Err(bytes) => {
1475                 match StringRecord::from_byte_record(bytes.clone()) {
1476                     Ok(str_headers) => (Ok(str_headers), bytes),
1477                     Err(err) => (Err(err.utf8_error().clone()), bytes),
1478                 }
1479             }
1480         };
1481         if self.state.trim.should_trim_headers() {
1482             if let Ok(ref mut str_headers) = str_headers.as_mut() {
1483                 str_headers.trim();
1484             }
1485             byte_headers.trim();
1486         }
1487         self.state.headers = Some(Headers {
1488             byte_record: byte_headers,
1489             string_record: str_headers,
1490         });
1491     }
1492 
1493     /// Read a single row into the given record. Returns false when no more
1494     /// records could be read.
1495     ///
1496     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1497     /// default), then this will never read the first record.
1498     ///
1499     /// This method is useful when you want to read records as fast as
1500     /// as possible. It's less ergonomic than an iterator, but it permits the
1501     /// caller to reuse the `StringRecord` allocation, which usually results
1502     /// in higher throughput.
1503     ///
1504     /// Records read via this method are guaranteed to have a position set
1505     /// on them, even if the reader is at EOF or if an error is returned.
1506     ///
1507     /// # Example
1508     ///
1509     /// ```
1510     /// use std::error::Error;
1511     /// use csv::{Reader, StringRecord};
1512     ///
1513     /// # fn main() { example().unwrap(); }
1514     /// fn example() -> Result<(), Box<dyn Error>> {
1515     ///     let data = "\
1516     /// city,country,pop
1517     /// Boston,United States,4628910
1518     /// ";
1519     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1520     ///     let mut record = StringRecord::new();
1521     ///
1522     ///     if rdr.read_record(&mut record)? {
1523     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1524     ///         Ok(())
1525     ///     } else {
1526     ///         Err(From::from("expected at least one record but got none"))
1527     ///     }
1528     /// }
1529     /// ```
read_record(&mut self, record: &mut StringRecord) -> Result<bool>1530     pub fn read_record(&mut self, record: &mut StringRecord) -> Result<bool> {
1531         let result = record.read(self);
1532         // We need to trim again because trimming string records includes
1533         // Unicode whitespace. (ByteRecord trimming only includes ASCII
1534         // whitespace.)
1535         if self.state.trim.should_trim_fields() {
1536             record.trim();
1537         }
1538         result
1539     }
1540 
1541     /// Read a single row into the given byte record. Returns false when no
1542     /// more records could be read.
1543     ///
1544     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1545     /// default), then this will never read the first record.
1546     ///
1547     /// This method is useful when you want to read records as fast as
1548     /// as possible. It's less ergonomic than an iterator, but it permits the
1549     /// caller to reuse the `ByteRecord` allocation, which usually results
1550     /// in higher throughput.
1551     ///
1552     /// Records read via this method are guaranteed to have a position set
1553     /// on them, even if the reader is at EOF or if an error is returned.
1554     ///
1555     /// # Example
1556     ///
1557     /// ```
1558     /// use std::error::Error;
1559     /// use csv::{ByteRecord, Reader};
1560     ///
1561     /// # fn main() { example().unwrap(); }
1562     /// fn example() -> Result<(), Box<dyn Error>> {
1563     ///     let data = "\
1564     /// city,country,pop
1565     /// Boston,United States,4628910
1566     /// ";
1567     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1568     ///     let mut record = ByteRecord::new();
1569     ///
1570     ///     if rdr.read_byte_record(&mut record)? {
1571     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1572     ///         Ok(())
1573     ///     } else {
1574     ///         Err(From::from("expected at least one record but got none"))
1575     ///     }
1576     /// }
1577     /// ```
read_byte_record( &mut self, record: &mut ByteRecord, ) -> Result<bool>1578     pub fn read_byte_record(
1579         &mut self,
1580         record: &mut ByteRecord,
1581     ) -> Result<bool> {
1582         if !self.state.seeked && !self.state.has_headers && !self.state.first {
1583             // If the caller indicated "no headers" and we haven't yielded the
1584             // first record yet, then we should yield our header row if we have
1585             // one.
1586             if let Some(ref headers) = self.state.headers {
1587                 self.state.first = true;
1588                 record.clone_from(&headers.byte_record);
1589                 if self.state.trim.should_trim_fields() {
1590                     record.trim();
1591                 }
1592                 return Ok(!record.is_empty());
1593             }
1594         }
1595         let ok = self.read_byte_record_impl(record)?;
1596         self.state.first = true;
1597         if !self.state.seeked && self.state.headers.is_none() {
1598             self.set_headers_impl(Err(record.clone()));
1599             // If the end user indicated that we have headers, then we should
1600             // never return the first row. Instead, we should attempt to
1601             // read and return the next one.
1602             if self.state.has_headers {
1603                 let result = self.read_byte_record_impl(record);
1604                 if self.state.trim.should_trim_fields() {
1605                     record.trim();
1606                 }
1607                 return result;
1608             }
1609         }
1610         if self.state.trim.should_trim_fields() {
1611             record.trim();
1612         }
1613         Ok(ok)
1614     }
1615 
1616     /// Read a byte record from the underlying CSV reader, without accounting
1617     /// for headers.
1618     #[inline(always)]
read_byte_record_impl( &mut self, record: &mut ByteRecord, ) -> Result<bool>1619     fn read_byte_record_impl(
1620         &mut self,
1621         record: &mut ByteRecord,
1622     ) -> Result<bool> {
1623         use csv_core::ReadRecordResult::*;
1624 
1625         record.clear();
1626         record.set_position(Some(self.state.cur_pos.clone()));
1627         if self.state.eof != ReaderEofState::NotEof {
1628             return Ok(false);
1629         }
1630         let (mut outlen, mut endlen) = (0, 0);
1631         loop {
1632             let (res, nin, nout, nend) = {
1633                 let input_res = self.rdr.fill_buf();
1634                 if input_res.is_err() {
1635                     self.state.eof = ReaderEofState::IOError;
1636                 }
1637                 let input = input_res?;
1638                 let (fields, ends) = record.as_parts();
1639                 self.core.read_record(
1640                     input,
1641                     &mut fields[outlen..],
1642                     &mut ends[endlen..],
1643                 )
1644             };
1645             self.rdr.consume(nin);
1646             let byte = self.state.cur_pos.byte();
1647             self.state
1648                 .cur_pos
1649                 .set_byte(byte + nin as u64)
1650                 .set_line(self.core.line());
1651             outlen += nout;
1652             endlen += nend;
1653             match res {
1654                 InputEmpty => continue,
1655                 OutputFull => {
1656                     record.expand_fields();
1657                     continue;
1658                 }
1659                 OutputEndsFull => {
1660                     record.expand_ends();
1661                     continue;
1662                 }
1663                 Record => {
1664                     record.set_len(endlen);
1665                     self.state.add_record(record)?;
1666                     return Ok(true);
1667                 }
1668                 End => {
1669                     self.state.eof = ReaderEofState::Eof;
1670                     return Ok(false);
1671                 }
1672             }
1673         }
1674     }
1675 
1676     /// Return the current position of this CSV reader.
1677     ///
1678     /// The byte offset in the position returned can be used to `seek` this
1679     /// reader. In particular, seeking to a position returned here on the same
1680     /// data will result in parsing the same subsequent record.
1681     ///
1682     /// # Example: reading the position
1683     ///
1684     /// ```
1685     /// use std::{error::Error, io};
1686     /// use csv::{Reader, Position};
1687     ///
1688     /// # fn main() { example().unwrap(); }
1689     /// fn example() -> Result<(), Box<dyn Error>> {
1690     ///     let data = "\
1691     /// city,country,popcount
1692     /// Boston,United States,4628910
1693     /// Concord,United States,42695
1694     /// ";
1695     ///     let rdr = Reader::from_reader(io::Cursor::new(data));
1696     ///     let mut iter = rdr.into_records();
1697     ///     let mut pos = Position::new();
1698     ///     loop {
1699     ///         // Read the position immediately before each record.
1700     ///         let next_pos = iter.reader().position().clone();
1701     ///         if iter.next().is_none() {
1702     ///             break;
1703     ///         }
1704     ///         pos = next_pos;
1705     ///     }
1706     ///
1707     ///     // `pos` should now be the position immediately before the last
1708     ///     // record.
1709     ///     assert_eq!(pos.byte(), 51);
1710     ///     assert_eq!(pos.line(), 3);
1711     ///     assert_eq!(pos.record(), 2);
1712     ///     Ok(())
1713     /// }
1714     /// ```
position(&self) -> &Position1715     pub fn position(&self) -> &Position {
1716         &self.state.cur_pos
1717     }
1718 
1719     /// Returns true if and only if this reader has been exhausted.
1720     ///
1721     /// When this returns true, no more records can be read from this reader
1722     /// (unless it has been seeked to another position).
1723     ///
1724     /// # Example
1725     ///
1726     /// ```
1727     /// use std::{error::Error, io};
1728     /// use csv::{Reader, Position};
1729     ///
1730     /// # fn main() { example().unwrap(); }
1731     /// fn example() -> Result<(), Box<dyn Error>> {
1732     ///     let data = "\
1733     /// city,country,popcount
1734     /// Boston,United States,4628910
1735     /// Concord,United States,42695
1736     /// ";
1737     ///     let mut rdr = Reader::from_reader(io::Cursor::new(data));
1738     ///     assert!(!rdr.is_done());
1739     ///     for result in rdr.records() {
1740     ///         let _ = result?;
1741     ///     }
1742     ///     assert!(rdr.is_done());
1743     ///     Ok(())
1744     /// }
1745     /// ```
is_done(&self) -> bool1746     pub fn is_done(&self) -> bool {
1747         self.state.eof != ReaderEofState::NotEof
1748     }
1749 
1750     /// Returns true if and only if this reader has been configured to
1751     /// interpret the first record as a header record.
has_headers(&self) -> bool1752     pub fn has_headers(&self) -> bool {
1753         self.state.has_headers
1754     }
1755 
1756     /// Returns a reference to the underlying reader.
get_ref(&self) -> &R1757     pub fn get_ref(&self) -> &R {
1758         self.rdr.get_ref()
1759     }
1760 
1761     /// Returns a mutable reference to the underlying reader.
get_mut(&mut self) -> &mut R1762     pub fn get_mut(&mut self) -> &mut R {
1763         self.rdr.get_mut()
1764     }
1765 
1766     /// Unwraps this CSV reader, returning the underlying reader.
1767     ///
1768     /// Note that any leftover data inside this reader's internal buffer is
1769     /// lost.
into_inner(self) -> R1770     pub fn into_inner(self) -> R {
1771         self.rdr.into_inner()
1772     }
1773 }
1774 
1775 impl<R: io::Read + io::Seek> Reader<R> {
1776     /// Seeks the underlying reader to the position given.
1777     ///
1778     /// This comes with a few caveats:
1779     ///
1780     /// * Any internal buffer associated with this reader is cleared.
1781     /// * If the given position does not correspond to a position immediately
1782     ///   before the start of a record, then the behavior of this reader is
1783     ///   unspecified.
1784     /// * Any special logic that skips the first record in the CSV reader
1785     ///   when reading or iterating over records is disabled.
1786     ///
1787     /// If the given position has a byte offset equivalent to the current
1788     /// position, then no seeking is performed.
1789     ///
1790     /// If the header row has not already been read, then this will attempt
1791     /// to read the header row before seeking. Therefore, it is possible that
1792     /// this returns an error associated with reading CSV data.
1793     ///
1794     /// Note that seeking is performed based only on the byte offset in the
1795     /// given position. Namely, the record or line numbers in the position may
1796     /// be incorrect, but this will cause any future position generated by
1797     /// this CSV reader to be similarly incorrect.
1798     ///
1799     /// # Example: seek to parse a record twice
1800     ///
1801     /// ```
1802     /// use std::{error::Error, io};
1803     /// use csv::{Reader, Position};
1804     ///
1805     /// # fn main() { example().unwrap(); }
1806     /// fn example() -> Result<(), Box<dyn Error>> {
1807     ///     let data = "\
1808     /// city,country,popcount
1809     /// Boston,United States,4628910
1810     /// Concord,United States,42695
1811     /// ";
1812     ///     let rdr = Reader::from_reader(io::Cursor::new(data));
1813     ///     let mut iter = rdr.into_records();
1814     ///     let mut pos = Position::new();
1815     ///     loop {
1816     ///         // Read the position immediately before each record.
1817     ///         let next_pos = iter.reader().position().clone();
1818     ///         if iter.next().is_none() {
1819     ///             break;
1820     ///         }
1821     ///         pos = next_pos;
1822     ///     }
1823     ///
1824     ///     // Now seek the reader back to `pos`. This will let us read the
1825     ///     // last record again.
1826     ///     iter.reader_mut().seek(pos)?;
1827     ///     let mut iter = iter.into_reader().into_records();
1828     ///     if let Some(result) = iter.next() {
1829     ///         let record = result?;
1830     ///         assert_eq!(record, vec!["Concord", "United States", "42695"]);
1831     ///         Ok(())
1832     ///     } else {
1833     ///         Err(From::from("expected at least one record but got none"))
1834     ///     }
1835     /// }
1836     /// ```
seek(&mut self, pos: Position) -> Result<()>1837     pub fn seek(&mut self, pos: Position) -> Result<()> {
1838         self.byte_headers()?;
1839         self.state.seeked = true;
1840         if pos.byte() == self.state.cur_pos.byte() {
1841             return Ok(());
1842         }
1843         self.rdr.seek(io::SeekFrom::Start(pos.byte()))?;
1844         self.core.reset();
1845         self.core.set_line(pos.line());
1846         self.state.cur_pos = pos;
1847         self.state.eof = ReaderEofState::NotEof;
1848         Ok(())
1849     }
1850 
1851     /// This is like `seek`, but provides direct control over how the seeking
1852     /// operation is performed via `io::SeekFrom`.
1853     ///
1854     /// The `pos` position given *should* correspond the position indicated
1855     /// by `seek_from`, but there is no requirement. If the `pos` position
1856     /// given is incorrect, then the position information returned by this
1857     /// reader will be similarly incorrect.
1858     ///
1859     /// If the header row has not already been read, then this will attempt
1860     /// to read the header row before seeking. Therefore, it is possible that
1861     /// this returns an error associated with reading CSV data.
1862     ///
1863     /// Unlike `seek`, this will always cause an actual seek to be performed.
seek_raw( &mut self, seek_from: io::SeekFrom, pos: Position, ) -> Result<()>1864     pub fn seek_raw(
1865         &mut self,
1866         seek_from: io::SeekFrom,
1867         pos: Position,
1868     ) -> Result<()> {
1869         self.byte_headers()?;
1870         self.state.seeked = true;
1871         self.rdr.seek(seek_from)?;
1872         self.core.reset();
1873         self.core.set_line(pos.line());
1874         self.state.cur_pos = pos;
1875         self.state.eof = ReaderEofState::NotEof;
1876         Ok(())
1877     }
1878 }
1879 
1880 impl ReaderState {
1881     #[inline(always)]
add_record(&mut self, record: &ByteRecord) -> Result<()>1882     fn add_record(&mut self, record: &ByteRecord) -> Result<()> {
1883         let i = self.cur_pos.record();
1884         self.cur_pos.set_record(i.checked_add(1).unwrap());
1885         if !self.flexible {
1886             match self.first_field_count {
1887                 None => self.first_field_count = Some(record.len() as u64),
1888                 Some(expected) => {
1889                     if record.len() as u64 != expected {
1890                         return Err(Error::new(ErrorKind::UnequalLengths {
1891                             pos: record.position().map(Clone::clone),
1892                             expected_len: expected,
1893                             len: record.len() as u64,
1894                         }));
1895                     }
1896                 }
1897             }
1898         }
1899         Ok(())
1900     }
1901 }
1902 
1903 /// An owned iterator over deserialized records.
1904 ///
1905 /// The type parameter `R` refers to the underlying `io::Read` type, and `D`
1906 /// refers to the type that this iterator will deserialize a record into.
1907 pub struct DeserializeRecordsIntoIter<R, D> {
1908     rdr: Reader<R>,
1909     rec: StringRecord,
1910     headers: Option<StringRecord>,
1911     _priv: PhantomData<D>,
1912 }
1913 
1914 impl<R: io::Read, D: DeserializeOwned> DeserializeRecordsIntoIter<R, D> {
new(mut rdr: Reader<R>) -> DeserializeRecordsIntoIter<R, D>1915     fn new(mut rdr: Reader<R>) -> DeserializeRecordsIntoIter<R, D> {
1916         let headers = if !rdr.state.has_headers {
1917             None
1918         } else {
1919             rdr.headers().ok().map(Clone::clone)
1920         };
1921         DeserializeRecordsIntoIter {
1922             rdr,
1923             rec: StringRecord::new(),
1924             headers,
1925             _priv: PhantomData,
1926         }
1927     }
1928 
1929     /// Return a reference to the underlying CSV reader.
reader(&self) -> &Reader<R>1930     pub fn reader(&self) -> &Reader<R> {
1931         &self.rdr
1932     }
1933 
1934     /// Return a mutable reference to the underlying CSV reader.
reader_mut(&mut self) -> &mut Reader<R>1935     pub fn reader_mut(&mut self) -> &mut Reader<R> {
1936         &mut self.rdr
1937     }
1938 
1939     /// Drop this iterator and return the underlying CSV reader.
into_reader(self) -> Reader<R>1940     pub fn into_reader(self) -> Reader<R> {
1941         self.rdr
1942     }
1943 }
1944 
1945 impl<R: io::Read, D: DeserializeOwned> Iterator
1946     for DeserializeRecordsIntoIter<R, D>
1947 {
1948     type Item = Result<D>;
1949 
next(&mut self) -> Option<Result<D>>1950     fn next(&mut self) -> Option<Result<D>> {
1951         match self.rdr.read_record(&mut self.rec) {
1952             Err(err) => Some(Err(err)),
1953             Ok(false) => None,
1954             Ok(true) => Some(self.rec.deserialize(self.headers.as_ref())),
1955         }
1956     }
1957 }
1958 
1959 /// A borrowed iterator over deserialized records.
1960 ///
1961 /// The lifetime parameter `'r` refers to the lifetime of the underlying
1962 /// CSV `Reader`. The type parameter `R` refers to the underlying `io::Read`
1963 /// type, and `D` refers to the type that this iterator will deserialize a
1964 /// record into.
1965 pub struct DeserializeRecordsIter<'r, R: 'r, D> {
1966     rdr: &'r mut Reader<R>,
1967     rec: StringRecord,
1968     headers: Option<StringRecord>,
1969     _priv: PhantomData<D>,
1970 }
1971 
1972 impl<'r, R: io::Read, D: DeserializeOwned> DeserializeRecordsIter<'r, R, D> {
new(rdr: &'r mut Reader<R>) -> DeserializeRecordsIter<'r, R, D>1973     fn new(rdr: &'r mut Reader<R>) -> DeserializeRecordsIter<'r, R, D> {
1974         let headers = if !rdr.state.has_headers {
1975             None
1976         } else {
1977             rdr.headers().ok().map(Clone::clone)
1978         };
1979         DeserializeRecordsIter {
1980             rdr,
1981             rec: StringRecord::new(),
1982             headers,
1983             _priv: PhantomData,
1984         }
1985     }
1986 
1987     /// Return a reference to the underlying CSV reader.
reader(&self) -> &Reader<R>1988     pub fn reader(&self) -> &Reader<R> {
1989         &self.rdr
1990     }
1991 
1992     /// Return a mutable reference to the underlying CSV reader.
reader_mut(&mut self) -> &mut Reader<R>1993     pub fn reader_mut(&mut self) -> &mut Reader<R> {
1994         &mut self.rdr
1995     }
1996 }
1997 
1998 impl<'r, R: io::Read, D: DeserializeOwned> Iterator
1999     for DeserializeRecordsIter<'r, R, D>
2000 {
2001     type Item = Result<D>;
2002 
next(&mut self) -> Option<Result<D>>2003     fn next(&mut self) -> Option<Result<D>> {
2004         match self.rdr.read_record(&mut self.rec) {
2005             Err(err) => Some(Err(err)),
2006             Ok(false) => None,
2007             Ok(true) => Some(self.rec.deserialize(self.headers.as_ref())),
2008         }
2009     }
2010 }
2011 
2012 /// An owned iterator over records as strings.
2013 pub struct StringRecordsIntoIter<R> {
2014     rdr: Reader<R>,
2015     rec: StringRecord,
2016 }
2017 
2018 impl<R: io::Read> StringRecordsIntoIter<R> {
new(rdr: Reader<R>) -> StringRecordsIntoIter<R>2019     fn new(rdr: Reader<R>) -> StringRecordsIntoIter<R> {
2020         StringRecordsIntoIter { rdr, rec: StringRecord::new() }
2021     }
2022 
2023     /// Return a reference to the underlying CSV reader.
reader(&self) -> &Reader<R>2024     pub fn reader(&self) -> &Reader<R> {
2025         &self.rdr
2026     }
2027 
2028     /// Return a mutable reference to the underlying CSV reader.
reader_mut(&mut self) -> &mut Reader<R>2029     pub fn reader_mut(&mut self) -> &mut Reader<R> {
2030         &mut self.rdr
2031     }
2032 
2033     /// Drop this iterator and return the underlying CSV reader.
into_reader(self) -> Reader<R>2034     pub fn into_reader(self) -> Reader<R> {
2035         self.rdr
2036     }
2037 }
2038 
2039 impl<R: io::Read> Iterator for StringRecordsIntoIter<R> {
2040     type Item = Result<StringRecord>;
2041 
next(&mut self) -> Option<Result<StringRecord>>2042     fn next(&mut self) -> Option<Result<StringRecord>> {
2043         match self.rdr.read_record(&mut self.rec) {
2044             Err(err) => Some(Err(err)),
2045             Ok(true) => Some(Ok(self.rec.clone_truncated())),
2046             Ok(false) => None,
2047         }
2048     }
2049 }
2050 
2051 /// A borrowed iterator over records as strings.
2052 ///
2053 /// The lifetime parameter `'r` refers to the lifetime of the underlying
2054 /// CSV `Reader`.
2055 pub struct StringRecordsIter<'r, R: 'r> {
2056     rdr: &'r mut Reader<R>,
2057     rec: StringRecord,
2058 }
2059 
2060 impl<'r, R: io::Read> StringRecordsIter<'r, R> {
new(rdr: &'r mut Reader<R>) -> StringRecordsIter<'r, R>2061     fn new(rdr: &'r mut Reader<R>) -> StringRecordsIter<'r, R> {
2062         StringRecordsIter { rdr, rec: StringRecord::new() }
2063     }
2064 
2065     /// Return a reference to the underlying CSV reader.
reader(&self) -> &Reader<R>2066     pub fn reader(&self) -> &Reader<R> {
2067         &self.rdr
2068     }
2069 
2070     /// Return a mutable reference to the underlying CSV reader.
reader_mut(&mut self) -> &mut Reader<R>2071     pub fn reader_mut(&mut self) -> &mut Reader<R> {
2072         &mut self.rdr
2073     }
2074 }
2075 
2076 impl<'r, R: io::Read> Iterator for StringRecordsIter<'r, R> {
2077     type Item = Result<StringRecord>;
2078 
next(&mut self) -> Option<Result<StringRecord>>2079     fn next(&mut self) -> Option<Result<StringRecord>> {
2080         match self.rdr.read_record(&mut self.rec) {
2081             Err(err) => Some(Err(err)),
2082             Ok(true) => Some(Ok(self.rec.clone_truncated())),
2083             Ok(false) => None,
2084         }
2085     }
2086 }
2087 
2088 /// An owned iterator over records as raw bytes.
2089 pub struct ByteRecordsIntoIter<R> {
2090     rdr: Reader<R>,
2091     rec: ByteRecord,
2092 }
2093 
2094 impl<R: io::Read> ByteRecordsIntoIter<R> {
new(rdr: Reader<R>) -> ByteRecordsIntoIter<R>2095     fn new(rdr: Reader<R>) -> ByteRecordsIntoIter<R> {
2096         ByteRecordsIntoIter { rdr, rec: ByteRecord::new() }
2097     }
2098 
2099     /// Return a reference to the underlying CSV reader.
reader(&self) -> &Reader<R>2100     pub fn reader(&self) -> &Reader<R> {
2101         &self.rdr
2102     }
2103 
2104     /// Return a mutable reference to the underlying CSV reader.
reader_mut(&mut self) -> &mut Reader<R>2105     pub fn reader_mut(&mut self) -> &mut Reader<R> {
2106         &mut self.rdr
2107     }
2108 
2109     /// Drop this iterator and return the underlying CSV reader.
into_reader(self) -> Reader<R>2110     pub fn into_reader(self) -> Reader<R> {
2111         self.rdr
2112     }
2113 }
2114 
2115 impl<R: io::Read> Iterator for ByteRecordsIntoIter<R> {
2116     type Item = Result<ByteRecord>;
2117 
next(&mut self) -> Option<Result<ByteRecord>>2118     fn next(&mut self) -> Option<Result<ByteRecord>> {
2119         match self.rdr.read_byte_record(&mut self.rec) {
2120             Err(err) => Some(Err(err)),
2121             Ok(true) => Some(Ok(self.rec.clone_truncated())),
2122             Ok(false) => None,
2123         }
2124     }
2125 }
2126 
2127 /// A borrowed iterator over records as raw bytes.
2128 ///
2129 /// The lifetime parameter `'r` refers to the lifetime of the underlying
2130 /// CSV `Reader`.
2131 pub struct ByteRecordsIter<'r, R: 'r> {
2132     rdr: &'r mut Reader<R>,
2133     rec: ByteRecord,
2134 }
2135 
2136 impl<'r, R: io::Read> ByteRecordsIter<'r, R> {
new(rdr: &'r mut Reader<R>) -> ByteRecordsIter<'r, R>2137     fn new(rdr: &'r mut Reader<R>) -> ByteRecordsIter<'r, R> {
2138         ByteRecordsIter { rdr, rec: ByteRecord::new() }
2139     }
2140 
2141     /// Return a reference to the underlying CSV reader.
reader(&self) -> &Reader<R>2142     pub fn reader(&self) -> &Reader<R> {
2143         &self.rdr
2144     }
2145 
2146     /// Return a mutable reference to the underlying CSV reader.
reader_mut(&mut self) -> &mut Reader<R>2147     pub fn reader_mut(&mut self) -> &mut Reader<R> {
2148         &mut self.rdr
2149     }
2150 }
2151 
2152 impl<'r, R: io::Read> Iterator for ByteRecordsIter<'r, R> {
2153     type Item = Result<ByteRecord>;
2154 
next(&mut self) -> Option<Result<ByteRecord>>2155     fn next(&mut self) -> Option<Result<ByteRecord>> {
2156         match self.rdr.read_byte_record(&mut self.rec) {
2157             Err(err) => Some(Err(err)),
2158             Ok(true) => Some(Ok(self.rec.clone_truncated())),
2159             Ok(false) => None,
2160         }
2161     }
2162 }
2163 
2164 #[cfg(test)]
2165 mod tests {
2166     use std::io;
2167 
2168     use crate::{
2169         byte_record::ByteRecord, error::ErrorKind, string_record::StringRecord,
2170     };
2171 
2172     use super::{Position, ReaderBuilder, Trim};
2173 
b(s: &str) -> &[u8]2174     fn b(s: &str) -> &[u8] {
2175         s.as_bytes()
2176     }
s(b: &[u8]) -> &str2177     fn s(b: &[u8]) -> &str {
2178         ::std::str::from_utf8(b).unwrap()
2179     }
2180 
newpos(byte: u64, line: u64, record: u64) -> Position2181     fn newpos(byte: u64, line: u64, record: u64) -> Position {
2182         let mut p = Position::new();
2183         p.set_byte(byte).set_line(line).set_record(record);
2184         p
2185     }
2186 
2187     #[test]
read_byte_record()2188     fn read_byte_record() {
2189         let data = b("foo,\"b,ar\",baz\nabc,mno,xyz");
2190         let mut rdr =
2191             ReaderBuilder::new().has_headers(false).from_reader(data);
2192         let mut rec = ByteRecord::new();
2193 
2194         assert!(rdr.read_byte_record(&mut rec).unwrap());
2195         assert_eq!(3, rec.len());
2196         assert_eq!("foo", s(&rec[0]));
2197         assert_eq!("b,ar", s(&rec[1]));
2198         assert_eq!("baz", s(&rec[2]));
2199 
2200         assert!(rdr.read_byte_record(&mut rec).unwrap());
2201         assert_eq!(3, rec.len());
2202         assert_eq!("abc", s(&rec[0]));
2203         assert_eq!("mno", s(&rec[1]));
2204         assert_eq!("xyz", s(&rec[2]));
2205 
2206         assert!(!rdr.read_byte_record(&mut rec).unwrap());
2207     }
2208 
2209     #[test]
read_trimmed_records_and_headers()2210     fn read_trimmed_records_and_headers() {
2211         let data = b("foo,  bar,\tbaz\n  1,  2,  3\n1\t,\t,3\t\t");
2212         let mut rdr = ReaderBuilder::new()
2213             .has_headers(true)
2214             .trim(Trim::All)
2215             .from_reader(data);
2216         let mut rec = ByteRecord::new();
2217         assert!(rdr.read_byte_record(&mut rec).unwrap());
2218         assert_eq!("1", s(&rec[0]));
2219         assert_eq!("2", s(&rec[1]));
2220         assert_eq!("3", s(&rec[2]));
2221         let mut rec = StringRecord::new();
2222         assert!(rdr.read_record(&mut rec).unwrap());
2223         assert_eq!("1", &rec[0]);
2224         assert_eq!("", &rec[1]);
2225         assert_eq!("3", &rec[2]);
2226         {
2227             let headers = rdr.headers().unwrap();
2228             assert_eq!(3, headers.len());
2229             assert_eq!("foo", &headers[0]);
2230             assert_eq!("bar", &headers[1]);
2231             assert_eq!("baz", &headers[2]);
2232         }
2233     }
2234 
2235     #[test]
read_trimmed_header()2236     fn read_trimmed_header() {
2237         let data = b("foo,  bar,\tbaz\n  1,  2,  3\n1\t,\t,3\t\t");
2238         let mut rdr = ReaderBuilder::new()
2239             .has_headers(true)
2240             .trim(Trim::Headers)
2241             .from_reader(data);
2242         let mut rec = ByteRecord::new();
2243         assert!(rdr.read_byte_record(&mut rec).unwrap());
2244         assert_eq!("  1", s(&rec[0]));
2245         assert_eq!("  2", s(&rec[1]));
2246         assert_eq!("  3", s(&rec[2]));
2247         {
2248             let headers = rdr.headers().unwrap();
2249             assert_eq!(3, headers.len());
2250             assert_eq!("foo", &headers[0]);
2251             assert_eq!("bar", &headers[1]);
2252             assert_eq!("baz", &headers[2]);
2253         }
2254     }
2255 
2256     #[test]
read_trimed_header_invalid_utf8()2257     fn read_trimed_header_invalid_utf8() {
2258         let data = &b"foo,  b\xFFar,\tbaz\na,b,c\nd,e,f"[..];
2259         let mut rdr = ReaderBuilder::new()
2260             .has_headers(true)
2261             .trim(Trim::Headers)
2262             .from_reader(data);
2263         let mut rec = StringRecord::new();
2264 
2265         // force the headers to be read
2266         let _ = rdr.read_record(&mut rec);
2267         // Check the byte headers are trimmed
2268         {
2269             let headers = rdr.byte_headers().unwrap();
2270             assert_eq!(3, headers.len());
2271             assert_eq!(b"foo", &headers[0]);
2272             assert_eq!(b"b\xFFar", &headers[1]);
2273             assert_eq!(b"baz", &headers[2]);
2274         }
2275         match *rdr.headers().unwrap_err().kind() {
2276             ErrorKind::Utf8 { pos: Some(ref pos), ref err } => {
2277                 assert_eq!(pos, &newpos(0, 1, 0));
2278                 assert_eq!(err.field(), 1);
2279                 assert_eq!(err.valid_up_to(), 3);
2280             }
2281             ref err => panic!("match failed, got {:?}", err),
2282         }
2283     }
2284 
2285     #[test]
read_trimmed_records()2286     fn read_trimmed_records() {
2287         let data = b("foo,  bar,\tbaz\n  1,  2,  3\n1\t,\t,3\t\t");
2288         let mut rdr = ReaderBuilder::new()
2289             .has_headers(true)
2290             .trim(Trim::Fields)
2291             .from_reader(data);
2292         let mut rec = ByteRecord::new();
2293         assert!(rdr.read_byte_record(&mut rec).unwrap());
2294         assert_eq!("1", s(&rec[0]));
2295         assert_eq!("2", s(&rec[1]));
2296         assert_eq!("3", s(&rec[2]));
2297         {
2298             let headers = rdr.headers().unwrap();
2299             assert_eq!(3, headers.len());
2300             assert_eq!("foo", &headers[0]);
2301             assert_eq!("  bar", &headers[1]);
2302             assert_eq!("\tbaz", &headers[2]);
2303         }
2304     }
2305 
2306     #[test]
read_trimmed_records_without_headers()2307     fn read_trimmed_records_without_headers() {
2308         let data = b("a1, b1\t,\t c1\t\n");
2309         let mut rdr = ReaderBuilder::new()
2310             .has_headers(false)
2311             .trim(Trim::All)
2312             .from_reader(data);
2313         let mut rec = ByteRecord::new();
2314         assert!(rdr.read_byte_record(&mut rec).unwrap());
2315         assert_eq!("a1", s(&rec[0]));
2316         assert_eq!("b1", s(&rec[1]));
2317         assert_eq!("c1", s(&rec[2]));
2318     }
2319 
2320     #[test]
read_record_unequal_fails()2321     fn read_record_unequal_fails() {
2322         let data = b("foo\nbar,baz");
2323         let mut rdr =
2324             ReaderBuilder::new().has_headers(false).from_reader(data);
2325         let mut rec = ByteRecord::new();
2326 
2327         assert!(rdr.read_byte_record(&mut rec).unwrap());
2328         assert_eq!(1, rec.len());
2329         assert_eq!("foo", s(&rec[0]));
2330 
2331         match rdr.read_byte_record(&mut rec) {
2332             Err(err) => match *err.kind() {
2333                 ErrorKind::UnequalLengths {
2334                     expected_len: 1,
2335                     ref pos,
2336                     len: 2,
2337                 } => {
2338                     assert_eq!(pos, &Some(newpos(4, 2, 1)));
2339                 }
2340                 ref wrong => panic!("match failed, got {:?}", wrong),
2341             },
2342             wrong => panic!("match failed, got {:?}", wrong),
2343         }
2344     }
2345 
2346     #[test]
read_record_unequal_ok()2347     fn read_record_unequal_ok() {
2348         let data = b("foo\nbar,baz");
2349         let mut rdr = ReaderBuilder::new()
2350             .has_headers(false)
2351             .flexible(true)
2352             .from_reader(data);
2353         let mut rec = ByteRecord::new();
2354 
2355         assert!(rdr.read_byte_record(&mut rec).unwrap());
2356         assert_eq!(1, rec.len());
2357         assert_eq!("foo", s(&rec[0]));
2358 
2359         assert!(rdr.read_byte_record(&mut rec).unwrap());
2360         assert_eq!(2, rec.len());
2361         assert_eq!("bar", s(&rec[0]));
2362         assert_eq!("baz", s(&rec[1]));
2363 
2364         assert!(!rdr.read_byte_record(&mut rec).unwrap());
2365     }
2366 
2367     // This tests that even if we get a CSV error, we can continue reading
2368     // if we want.
2369     #[test]
read_record_unequal_continue()2370     fn read_record_unequal_continue() {
2371         let data = b("foo\nbar,baz\nquux");
2372         let mut rdr =
2373             ReaderBuilder::new().has_headers(false).from_reader(data);
2374         let mut rec = ByteRecord::new();
2375 
2376         assert!(rdr.read_byte_record(&mut rec).unwrap());
2377         assert_eq!(1, rec.len());
2378         assert_eq!("foo", s(&rec[0]));
2379 
2380         match rdr.read_byte_record(&mut rec) {
2381             Err(err) => match err.kind() {
2382                 &ErrorKind::UnequalLengths {
2383                     expected_len: 1,
2384                     ref pos,
2385                     len: 2,
2386                 } => {
2387                     assert_eq!(pos, &Some(newpos(4, 2, 1)));
2388                 }
2389                 wrong => panic!("match failed, got {:?}", wrong),
2390             },
2391             wrong => panic!("match failed, got {:?}", wrong),
2392         }
2393 
2394         assert!(rdr.read_byte_record(&mut rec).unwrap());
2395         assert_eq!(1, rec.len());
2396         assert_eq!("quux", s(&rec[0]));
2397 
2398         assert!(!rdr.read_byte_record(&mut rec).unwrap());
2399     }
2400 
2401     #[test]
read_record_headers()2402     fn read_record_headers() {
2403         let data = b("foo,bar,baz\na,b,c\nd,e,f");
2404         let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(data);
2405         let mut rec = StringRecord::new();
2406 
2407         assert!(rdr.read_record(&mut rec).unwrap());
2408         assert_eq!(3, rec.len());
2409         assert_eq!("a", &rec[0]);
2410 
2411         assert!(rdr.read_record(&mut rec).unwrap());
2412         assert_eq!(3, rec.len());
2413         assert_eq!("d", &rec[0]);
2414 
2415         assert!(!rdr.read_record(&mut rec).unwrap());
2416 
2417         {
2418             let headers = rdr.byte_headers().unwrap();
2419             assert_eq!(3, headers.len());
2420             assert_eq!(b"foo", &headers[0]);
2421             assert_eq!(b"bar", &headers[1]);
2422             assert_eq!(b"baz", &headers[2]);
2423         }
2424         {
2425             let headers = rdr.headers().unwrap();
2426             assert_eq!(3, headers.len());
2427             assert_eq!("foo", &headers[0]);
2428             assert_eq!("bar", &headers[1]);
2429             assert_eq!("baz", &headers[2]);
2430         }
2431     }
2432 
2433     #[test]
read_record_headers_invalid_utf8()2434     fn read_record_headers_invalid_utf8() {
2435         let data = &b"foo,b\xFFar,baz\na,b,c\nd,e,f"[..];
2436         let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(data);
2437         let mut rec = StringRecord::new();
2438 
2439         assert!(rdr.read_record(&mut rec).unwrap());
2440         assert_eq!(3, rec.len());
2441         assert_eq!("a", &rec[0]);
2442 
2443         assert!(rdr.read_record(&mut rec).unwrap());
2444         assert_eq!(3, rec.len());
2445         assert_eq!("d", &rec[0]);
2446 
2447         assert!(!rdr.read_record(&mut rec).unwrap());
2448 
2449         // Check that we can read the headers as raw bytes, but that
2450         // if we read them as strings, we get an appropriate UTF-8 error.
2451         {
2452             let headers = rdr.byte_headers().unwrap();
2453             assert_eq!(3, headers.len());
2454             assert_eq!(b"foo", &headers[0]);
2455             assert_eq!(b"b\xFFar", &headers[1]);
2456             assert_eq!(b"baz", &headers[2]);
2457         }
2458         match *rdr.headers().unwrap_err().kind() {
2459             ErrorKind::Utf8 { pos: Some(ref pos), ref err } => {
2460                 assert_eq!(pos, &newpos(0, 1, 0));
2461                 assert_eq!(err.field(), 1);
2462                 assert_eq!(err.valid_up_to(), 1);
2463             }
2464             ref err => panic!("match failed, got {:?}", err),
2465         }
2466     }
2467 
2468     #[test]
read_record_no_headers_before()2469     fn read_record_no_headers_before() {
2470         let data = b("foo,bar,baz\na,b,c\nd,e,f");
2471         let mut rdr =
2472             ReaderBuilder::new().has_headers(false).from_reader(data);
2473         let mut rec = StringRecord::new();
2474 
2475         {
2476             let headers = rdr.headers().unwrap();
2477             assert_eq!(3, headers.len());
2478             assert_eq!("foo", &headers[0]);
2479             assert_eq!("bar", &headers[1]);
2480             assert_eq!("baz", &headers[2]);
2481         }
2482 
2483         assert!(rdr.read_record(&mut rec).unwrap());
2484         assert_eq!(3, rec.len());
2485         assert_eq!("foo", &rec[0]);
2486 
2487         assert!(rdr.read_record(&mut rec).unwrap());
2488         assert_eq!(3, rec.len());
2489         assert_eq!("a", &rec[0]);
2490 
2491         assert!(rdr.read_record(&mut rec).unwrap());
2492         assert_eq!(3, rec.len());
2493         assert_eq!("d", &rec[0]);
2494 
2495         assert!(!rdr.read_record(&mut rec).unwrap());
2496     }
2497 
2498     #[test]
read_record_no_headers_after()2499     fn read_record_no_headers_after() {
2500         let data = b("foo,bar,baz\na,b,c\nd,e,f");
2501         let mut rdr =
2502             ReaderBuilder::new().has_headers(false).from_reader(data);
2503         let mut rec = StringRecord::new();
2504 
2505         assert!(rdr.read_record(&mut rec).unwrap());
2506         assert_eq!(3, rec.len());
2507         assert_eq!("foo", &rec[0]);
2508 
2509         assert!(rdr.read_record(&mut rec).unwrap());
2510         assert_eq!(3, rec.len());
2511         assert_eq!("a", &rec[0]);
2512 
2513         assert!(rdr.read_record(&mut rec).unwrap());
2514         assert_eq!(3, rec.len());
2515         assert_eq!("d", &rec[0]);
2516 
2517         assert!(!rdr.read_record(&mut rec).unwrap());
2518 
2519         let headers = rdr.headers().unwrap();
2520         assert_eq!(3, headers.len());
2521         assert_eq!("foo", &headers[0]);
2522         assert_eq!("bar", &headers[1]);
2523         assert_eq!("baz", &headers[2]);
2524     }
2525 
2526     #[test]
seek()2527     fn seek() {
2528         let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
2529         let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
2530         rdr.seek(newpos(18, 3, 2)).unwrap();
2531 
2532         let mut rec = StringRecord::new();
2533 
2534         assert_eq!(18, rdr.position().byte());
2535         assert!(rdr.read_record(&mut rec).unwrap());
2536         assert_eq!(3, rec.len());
2537         assert_eq!("d", &rec[0]);
2538 
2539         assert_eq!(24, rdr.position().byte());
2540         assert_eq!(4, rdr.position().line());
2541         assert_eq!(3, rdr.position().record());
2542         assert!(rdr.read_record(&mut rec).unwrap());
2543         assert_eq!(3, rec.len());
2544         assert_eq!("g", &rec[0]);
2545 
2546         assert!(!rdr.read_record(&mut rec).unwrap());
2547     }
2548 
2549     // Test that we can read headers after seeking even if the headers weren't
2550     // explicit read before seeking.
2551     #[test]
seek_headers_after()2552     fn seek_headers_after() {
2553         let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
2554         let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
2555         rdr.seek(newpos(18, 3, 2)).unwrap();
2556         assert_eq!(rdr.headers().unwrap(), vec!["foo", "bar", "baz"]);
2557     }
2558 
2559     // Test that we can read headers after seeking if the headers were read
2560     // before seeking.
2561     #[test]
seek_headers_before_after()2562     fn seek_headers_before_after() {
2563         let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
2564         let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
2565         let headers = rdr.headers().unwrap().clone();
2566         rdr.seek(newpos(18, 3, 2)).unwrap();
2567         assert_eq!(&headers, rdr.headers().unwrap());
2568     }
2569 
2570     // Test that even if we didn't read headers before seeking, if we seek to
2571     // the current byte offset, then no seeking is done and therefore we can
2572     // still read headers after seeking.
2573     #[test]
seek_headers_no_actual_seek()2574     fn seek_headers_no_actual_seek() {
2575         let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
2576         let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
2577         rdr.seek(Position::new()).unwrap();
2578         assert_eq!("foo", &rdr.headers().unwrap()[0]);
2579     }
2580 
2581     // Test that position info is reported correctly in absence of headers.
2582     #[test]
positions_no_headers()2583     fn positions_no_headers() {
2584         let mut rdr = ReaderBuilder::new()
2585             .has_headers(false)
2586             .from_reader("a,b,c\nx,y,z".as_bytes())
2587             .into_records();
2588 
2589         let pos = rdr.next().unwrap().unwrap().position().unwrap().clone();
2590         assert_eq!(pos.byte(), 0);
2591         assert_eq!(pos.line(), 1);
2592         assert_eq!(pos.record(), 0);
2593 
2594         let pos = rdr.next().unwrap().unwrap().position().unwrap().clone();
2595         assert_eq!(pos.byte(), 6);
2596         assert_eq!(pos.line(), 2);
2597         assert_eq!(pos.record(), 1);
2598     }
2599 
2600     // Test that position info is reported correctly with headers.
2601     #[test]
positions_headers()2602     fn positions_headers() {
2603         let mut rdr = ReaderBuilder::new()
2604             .has_headers(true)
2605             .from_reader("a,b,c\nx,y,z".as_bytes())
2606             .into_records();
2607 
2608         let pos = rdr.next().unwrap().unwrap().position().unwrap().clone();
2609         assert_eq!(pos.byte(), 6);
2610         assert_eq!(pos.line(), 2);
2611         assert_eq!(pos.record(), 1);
2612     }
2613 
2614     // Test that reading headers on empty data yields an empty record.
2615     #[test]
headers_on_empty_data()2616     fn headers_on_empty_data() {
2617         let mut rdr = ReaderBuilder::new().from_reader("".as_bytes());
2618         let r = rdr.byte_headers().unwrap();
2619         assert_eq!(r.len(), 0);
2620     }
2621 
2622     // Test that reading the first record on empty data works.
2623     #[test]
no_headers_on_empty_data()2624     fn no_headers_on_empty_data() {
2625         let mut rdr =
2626             ReaderBuilder::new().has_headers(false).from_reader("".as_bytes());
2627         assert_eq!(rdr.records().count(), 0);
2628     }
2629 
2630     // Test that reading the first record on empty data works, even if
2631     // we've tried to read headers before hand.
2632     #[test]
no_headers_on_empty_data_after_headers()2633     fn no_headers_on_empty_data_after_headers() {
2634         let mut rdr =
2635             ReaderBuilder::new().has_headers(false).from_reader("".as_bytes());
2636         assert_eq!(rdr.headers().unwrap().len(), 0);
2637         assert_eq!(rdr.records().count(), 0);
2638     }
2639 }
2640