• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! CSV Virtual Table.
2 //!
3 //! Port of [csv](http://www.sqlite.org/cgi/src/finfo?name=ext/misc/csv.c) C
4 //! extension: `https://www.sqlite.org/csv.html`
5 //!
6 //! # Example
7 //!
8 //! ```rust,no_run
9 //! # use rusqlite::{Connection, Result};
10 //! fn example() -> Result<()> {
11 //!     // Note: This should be done once (usually when opening the DB).
12 //!     let db = Connection::open_in_memory()?;
13 //!     rusqlite::vtab::csvtab::load_module(&db)?;
14 //!     // Assume my_csv.csv
15 //!     let schema = "
16 //!         CREATE VIRTUAL TABLE my_csv_data
17 //!         USING csv(filename = 'my_csv.csv')
18 //!     ";
19 //!     db.execute_batch(schema)?;
20 //!     // Now the `my_csv_data` (virtual) table can be queried as normal...
21 //!     Ok(())
22 //! }
23 //! ```
24 use std::fs::File;
25 use std::marker::PhantomData;
26 use std::os::raw::c_int;
27 use std::path::Path;
28 use std::str;
29 
30 use crate::ffi;
31 use crate::types::Null;
32 use crate::vtab::{
33     escape_double_quote, parse_boolean, read_only_module, Context, CreateVTab, IndexInfo, VTab,
34     VTabConfig, VTabConnection, VTabCursor, VTabKind, Values,
35 };
36 use crate::{Connection, Error, Result};
37 
38 /// Register the "csv" module.
39 /// ```sql
40 /// CREATE VIRTUAL TABLE vtab USING csv(
41 ///   filename=FILENAME -- Name of file containing CSV content
42 ///   [, schema=SCHEMA] -- Alternative CSV schema. 'CREATE TABLE x(col1 TEXT NOT NULL, col2 INT, ...);'
43 ///   [, header=YES|NO] -- First row of CSV defines the names of columns if "yes". Default "no".
44 ///   [, columns=N] -- Assume the CSV file contains N columns.
45 ///   [, delimiter=C] -- CSV delimiter. Default ','.
46 ///   [, quote=C] -- CSV quote. Default '"'. 0 means no quote.
47 /// );
48 /// ```
load_module(conn: &Connection) -> Result<()>49 pub fn load_module(conn: &Connection) -> Result<()> {
50     let aux: Option<()> = None;
51     conn.create_module("csv", read_only_module::<CsvTab>(), aux)
52 }
53 
54 /// An instance of the CSV virtual table
55 #[repr(C)]
56 struct CsvTab {
57     /// Base class. Must be first
58     base: ffi::sqlite3_vtab,
59     /// Name of the CSV file
60     filename: String,
61     has_headers: bool,
62     delimiter: u8,
63     quote: u8,
64     /// Offset to start of data
65     offset_first_row: csv::Position,
66 }
67 
68 impl CsvTab {
reader(&self) -> Result<csv::Reader<File>, csv::Error>69     fn reader(&self) -> Result<csv::Reader<File>, csv::Error> {
70         csv::ReaderBuilder::new()
71             .has_headers(self.has_headers)
72             .delimiter(self.delimiter)
73             .quote(self.quote)
74             .from_path(&self.filename)
75     }
76 
parse_byte(arg: &str) -> Option<u8>77     fn parse_byte(arg: &str) -> Option<u8> {
78         if arg.len() == 1 {
79             arg.bytes().next()
80         } else {
81             None
82         }
83     }
84 }
85 
86 unsafe impl<'vtab> VTab<'vtab> for CsvTab {
87     type Aux = ();
88     type Cursor = CsvTabCursor<'vtab>;
89 
connect( db: &mut VTabConnection, _aux: Option<&()>, args: &[&[u8]], ) -> Result<(String, CsvTab)>90     fn connect(
91         db: &mut VTabConnection,
92         _aux: Option<&()>,
93         args: &[&[u8]],
94     ) -> Result<(String, CsvTab)> {
95         if args.len() < 4 {
96             return Err(Error::ModuleError("no CSV file specified".to_owned()));
97         }
98 
99         let mut vtab = CsvTab {
100             base: ffi::sqlite3_vtab::default(),
101             filename: "".to_owned(),
102             has_headers: false,
103             delimiter: b',',
104             quote: b'"',
105             offset_first_row: csv::Position::new(),
106         };
107         let mut schema = None;
108         let mut n_col = None;
109 
110         let args = &args[3..];
111         for c_slice in args {
112             let (param, value) = super::parameter(c_slice)?;
113             match param {
114                 "filename" => {
115                     if !Path::new(value).exists() {
116                         return Err(Error::ModuleError(format!("file '{value}' does not exist")));
117                     }
118                     value.clone_into(&mut vtab.filename);
119                 }
120                 "schema" => {
121                     schema = Some(value.to_owned());
122                 }
123                 "columns" => {
124                     if let Ok(n) = value.parse::<u16>() {
125                         if n_col.is_some() {
126                             return Err(Error::ModuleError(
127                                 "more than one 'columns' parameter".to_owned(),
128                             ));
129                         } else if n == 0 {
130                             return Err(Error::ModuleError(
131                                 "must have at least one column".to_owned(),
132                             ));
133                         }
134                         n_col = Some(n);
135                     } else {
136                         return Err(Error::ModuleError(format!(
137                             "unrecognized argument to 'columns': {value}"
138                         )));
139                     }
140                 }
141                 "header" => {
142                     if let Some(b) = parse_boolean(value) {
143                         vtab.has_headers = b;
144                     } else {
145                         return Err(Error::ModuleError(format!(
146                             "unrecognized argument to 'header': {value}"
147                         )));
148                     }
149                 }
150                 "delimiter" => {
151                     if let Some(b) = CsvTab::parse_byte(value) {
152                         vtab.delimiter = b;
153                     } else {
154                         return Err(Error::ModuleError(format!(
155                             "unrecognized argument to 'delimiter': {value}"
156                         )));
157                     }
158                 }
159                 "quote" => {
160                     if let Some(b) = CsvTab::parse_byte(value) {
161                         if b == b'0' {
162                             vtab.quote = 0;
163                         } else {
164                             vtab.quote = b;
165                         }
166                     } else {
167                         return Err(Error::ModuleError(format!(
168                             "unrecognized argument to 'quote': {value}"
169                         )));
170                     }
171                 }
172                 _ => {
173                     return Err(Error::ModuleError(format!(
174                         "unrecognized parameter '{param}'"
175                     )));
176                 }
177             }
178         }
179 
180         if vtab.filename.is_empty() {
181             return Err(Error::ModuleError("no CSV file specified".to_owned()));
182         }
183 
184         let mut cols: Vec<String> = Vec::new();
185         if vtab.has_headers || (n_col.is_none() && schema.is_none()) {
186             let mut reader = vtab.reader()?;
187             if vtab.has_headers {
188                 {
189                     let headers = reader.headers()?;
190                     // headers ignored if cols is not empty
191                     if n_col.is_none() && schema.is_none() {
192                         cols = headers
193                             .into_iter()
194                             .map(|header| escape_double_quote(header).into_owned())
195                             .collect();
196                     }
197                 }
198                 vtab.offset_first_row = reader.position().clone();
199             } else {
200                 let mut record = csv::ByteRecord::new();
201                 if reader.read_byte_record(&mut record)? {
202                     for (i, _) in record.iter().enumerate() {
203                         cols.push(format!("c{i}"));
204                     }
205                 }
206             }
207         } else if let Some(n_col) = n_col {
208             for i in 0..n_col {
209                 cols.push(format!("c{i}"));
210             }
211         }
212 
213         if cols.is_empty() && schema.is_none() {
214             return Err(Error::ModuleError("no column specified".to_owned()));
215         }
216 
217         if schema.is_none() {
218             let mut sql = String::from("CREATE TABLE x(");
219             for (i, col) in cols.iter().enumerate() {
220                 sql.push('"');
221                 sql.push_str(col);
222                 sql.push_str("\" TEXT");
223                 if i == cols.len() - 1 {
224                     sql.push_str(");");
225                 } else {
226                     sql.push_str(", ");
227                 }
228             }
229             schema = Some(sql);
230         }
231         db.config(VTabConfig::DirectOnly)?;
232         Ok((schema.unwrap(), vtab))
233     }
234 
235     // Only a forward full table scan is supported.
best_index(&self, info: &mut IndexInfo) -> Result<()>236     fn best_index(&self, info: &mut IndexInfo) -> Result<()> {
237         info.set_estimated_cost(1_000_000.);
238         Ok(())
239     }
240 
open(&mut self) -> Result<CsvTabCursor<'_>>241     fn open(&mut self) -> Result<CsvTabCursor<'_>> {
242         Ok(CsvTabCursor::new(self.reader()?))
243     }
244 }
245 
246 impl CreateVTab<'_> for CsvTab {
247     const KIND: VTabKind = VTabKind::Default;
248 }
249 
250 /// A cursor for the CSV virtual table
251 #[repr(C)]
252 struct CsvTabCursor<'vtab> {
253     /// Base class. Must be first
254     base: ffi::sqlite3_vtab_cursor,
255     /// The CSV reader object
256     reader: csv::Reader<File>,
257     /// Current cursor position used as rowid
258     row_number: usize,
259     /// Values of the current row
260     cols: csv::StringRecord,
261     eof: bool,
262     phantom: PhantomData<&'vtab CsvTab>,
263 }
264 
265 impl CsvTabCursor<'_> {
new<'vtab>(reader: csv::Reader<File>) -> CsvTabCursor<'vtab>266     fn new<'vtab>(reader: csv::Reader<File>) -> CsvTabCursor<'vtab> {
267         CsvTabCursor {
268             base: ffi::sqlite3_vtab_cursor::default(),
269             reader,
270             row_number: 0,
271             cols: csv::StringRecord::new(),
272             eof: false,
273             phantom: PhantomData,
274         }
275     }
276 
277     /// Accessor to the associated virtual table.
vtab(&self) -> &CsvTab278     fn vtab(&self) -> &CsvTab {
279         unsafe { &*(self.base.pVtab as *const CsvTab) }
280     }
281 }
282 
283 unsafe impl VTabCursor for CsvTabCursor<'_> {
284     // Only a full table scan is supported.  So `filter` simply rewinds to
285     // the beginning.
filter( &mut self, _idx_num: c_int, _idx_str: Option<&str>, _args: &Values<'_>, ) -> Result<()>286     fn filter(
287         &mut self,
288         _idx_num: c_int,
289         _idx_str: Option<&str>,
290         _args: &Values<'_>,
291     ) -> Result<()> {
292         {
293             let offset_first_row = self.vtab().offset_first_row.clone();
294             self.reader.seek(offset_first_row)?;
295         }
296         self.row_number = 0;
297         self.next()
298     }
299 
next(&mut self) -> Result<()>300     fn next(&mut self) -> Result<()> {
301         {
302             self.eof = self.reader.is_done();
303             if self.eof {
304                 return Ok(());
305             }
306 
307             self.eof = !self.reader.read_record(&mut self.cols)?;
308         }
309 
310         self.row_number += 1;
311         Ok(())
312     }
313 
eof(&self) -> bool314     fn eof(&self) -> bool {
315         self.eof
316     }
317 
column(&self, ctx: &mut Context, col: c_int) -> Result<()>318     fn column(&self, ctx: &mut Context, col: c_int) -> Result<()> {
319         if col < 0 || col as usize >= self.cols.len() {
320             return Err(Error::ModuleError(format!(
321                 "column index out of bounds: {col}"
322             )));
323         }
324         if self.cols.is_empty() {
325             return ctx.set_result(&Null);
326         }
327         // TODO Affinity
328         ctx.set_result(&self.cols[col as usize].to_owned())
329     }
330 
rowid(&self) -> Result<i64>331     fn rowid(&self) -> Result<i64> {
332         Ok(self.row_number as i64)
333     }
334 }
335 
336 impl From<csv::Error> for Error {
337     #[cold]
from(err: csv::Error) -> Error338     fn from(err: csv::Error) -> Error {
339         Error::ModuleError(err.to_string())
340     }
341 }
342 
343 #[cfg(test)]
344 mod test {
345     use crate::vtab::csvtab;
346     use crate::{Connection, Result};
347     use fallible_iterator::FallibleIterator;
348 
349     #[test]
test_csv_module() -> Result<()>350     fn test_csv_module() -> Result<()> {
351         let db = Connection::open_in_memory()?;
352         csvtab::load_module(&db)?;
353         db.execute_batch(
354             "CREATE VIRTUAL TABLE vtab USING csv(filename = 'test.csv', header = yes)",
355         )?;
356 
357         {
358             let mut s = db.prepare("SELECT rowid, * FROM vtab")?;
359             {
360                 let headers = s.column_names();
361                 assert_eq!(vec!["rowid", "colA", "colB", "colC"], headers);
362             }
363 
364             let ids: Result<Vec<i32>> = s.query([])?.map(|row| row.get::<_, i32>(0)).collect();
365             let sum = ids?.iter().sum::<i32>();
366             assert_eq!(sum, 15);
367         }
368         db.execute_batch("DROP TABLE vtab")
369     }
370 
371     #[test]
test_csv_cursor() -> Result<()>372     fn test_csv_cursor() -> Result<()> {
373         let db = Connection::open_in_memory()?;
374         csvtab::load_module(&db)?;
375         db.execute_batch("CREATE VIRTUAL TABLE vtab USING csv(filename='test.csv', header=yes)")?;
376 
377         {
378             let mut s = db.prepare(
379                 "SELECT v1.rowid, v1.* FROM vtab v1 NATURAL JOIN vtab v2 WHERE \
380                      v1.rowid < v2.rowid",
381             )?;
382 
383             let mut rows = s.query([])?;
384             let row = rows.next()?.unwrap();
385             assert_eq!(row.get_unwrap::<_, i32>(0), 2);
386         }
387         db.execute_batch("DROP TABLE vtab")
388     }
389 }
390