1 //! CSV Virtual Table.
2 //!
3 //! Port of [csv](http://www.sqlite.org/cgi/src/finfo?name=ext/misc/csv.c) C
4 //! extension: `https://www.sqlite.org/csv.html`
5 //!
6 //! # Example
7 //!
8 //! ```rust,no_run
9 //! # use rusqlite::{Connection, Result};
10 //! fn example() -> Result<()> {
11 //! // Note: This should be done once (usually when opening the DB).
12 //! let db = Connection::open_in_memory()?;
13 //! rusqlite::vtab::csvtab::load_module(&db)?;
14 //! // Assume my_csv.csv
15 //! let schema = "
16 //! CREATE VIRTUAL TABLE my_csv_data
17 //! USING csv(filename = 'my_csv.csv')
18 //! ";
19 //! db.execute_batch(schema)?;
20 //! // Now the `my_csv_data` (virtual) table can be queried as normal...
21 //! Ok(())
22 //! }
23 //! ```
24 use std::fs::File;
25 use std::marker::PhantomData;
26 use std::os::raw::c_int;
27 use std::path::Path;
28 use std::str;
29
30 use crate::ffi;
31 use crate::types::Null;
32 use crate::vtab::{
33 escape_double_quote, parse_boolean, read_only_module, Context, CreateVTab, IndexInfo, VTab,
34 VTabConfig, VTabConnection, VTabCursor, VTabKind, Values,
35 };
36 use crate::{Connection, Error, Result};
37
38 /// Register the "csv" module.
39 /// ```sql
40 /// CREATE VIRTUAL TABLE vtab USING csv(
41 /// filename=FILENAME -- Name of file containing CSV content
42 /// [, schema=SCHEMA] -- Alternative CSV schema. 'CREATE TABLE x(col1 TEXT NOT NULL, col2 INT, ...);'
43 /// [, header=YES|NO] -- First row of CSV defines the names of columns if "yes". Default "no".
44 /// [, columns=N] -- Assume the CSV file contains N columns.
45 /// [, delimiter=C] -- CSV delimiter. Default ','.
46 /// [, quote=C] -- CSV quote. Default '"'. 0 means no quote.
47 /// );
48 /// ```
load_module(conn: &Connection) -> Result<()>49 pub fn load_module(conn: &Connection) -> Result<()> {
50 let aux: Option<()> = None;
51 conn.create_module("csv", read_only_module::<CsvTab>(), aux)
52 }
53
54 /// An instance of the CSV virtual table
55 #[repr(C)]
56 struct CsvTab {
57 /// Base class. Must be first
58 base: ffi::sqlite3_vtab,
59 /// Name of the CSV file
60 filename: String,
61 has_headers: bool,
62 delimiter: u8,
63 quote: u8,
64 /// Offset to start of data
65 offset_first_row: csv::Position,
66 }
67
68 impl CsvTab {
reader(&self) -> Result<csv::Reader<File>, csv::Error>69 fn reader(&self) -> Result<csv::Reader<File>, csv::Error> {
70 csv::ReaderBuilder::new()
71 .has_headers(self.has_headers)
72 .delimiter(self.delimiter)
73 .quote(self.quote)
74 .from_path(&self.filename)
75 }
76
parse_byte(arg: &str) -> Option<u8>77 fn parse_byte(arg: &str) -> Option<u8> {
78 if arg.len() == 1 {
79 arg.bytes().next()
80 } else {
81 None
82 }
83 }
84 }
85
86 unsafe impl<'vtab> VTab<'vtab> for CsvTab {
87 type Aux = ();
88 type Cursor = CsvTabCursor<'vtab>;
89
connect( db: &mut VTabConnection, _aux: Option<&()>, args: &[&[u8]], ) -> Result<(String, CsvTab)>90 fn connect(
91 db: &mut VTabConnection,
92 _aux: Option<&()>,
93 args: &[&[u8]],
94 ) -> Result<(String, CsvTab)> {
95 if args.len() < 4 {
96 return Err(Error::ModuleError("no CSV file specified".to_owned()));
97 }
98
99 let mut vtab = CsvTab {
100 base: ffi::sqlite3_vtab::default(),
101 filename: "".to_owned(),
102 has_headers: false,
103 delimiter: b',',
104 quote: b'"',
105 offset_first_row: csv::Position::new(),
106 };
107 let mut schema = None;
108 let mut n_col = None;
109
110 let args = &args[3..];
111 for c_slice in args {
112 let (param, value) = super::parameter(c_slice)?;
113 match param {
114 "filename" => {
115 if !Path::new(value).exists() {
116 return Err(Error::ModuleError(format!("file '{value}' does not exist")));
117 }
118 value.clone_into(&mut vtab.filename);
119 }
120 "schema" => {
121 schema = Some(value.to_owned());
122 }
123 "columns" => {
124 if let Ok(n) = value.parse::<u16>() {
125 if n_col.is_some() {
126 return Err(Error::ModuleError(
127 "more than one 'columns' parameter".to_owned(),
128 ));
129 } else if n == 0 {
130 return Err(Error::ModuleError(
131 "must have at least one column".to_owned(),
132 ));
133 }
134 n_col = Some(n);
135 } else {
136 return Err(Error::ModuleError(format!(
137 "unrecognized argument to 'columns': {value}"
138 )));
139 }
140 }
141 "header" => {
142 if let Some(b) = parse_boolean(value) {
143 vtab.has_headers = b;
144 } else {
145 return Err(Error::ModuleError(format!(
146 "unrecognized argument to 'header': {value}"
147 )));
148 }
149 }
150 "delimiter" => {
151 if let Some(b) = CsvTab::parse_byte(value) {
152 vtab.delimiter = b;
153 } else {
154 return Err(Error::ModuleError(format!(
155 "unrecognized argument to 'delimiter': {value}"
156 )));
157 }
158 }
159 "quote" => {
160 if let Some(b) = CsvTab::parse_byte(value) {
161 if b == b'0' {
162 vtab.quote = 0;
163 } else {
164 vtab.quote = b;
165 }
166 } else {
167 return Err(Error::ModuleError(format!(
168 "unrecognized argument to 'quote': {value}"
169 )));
170 }
171 }
172 _ => {
173 return Err(Error::ModuleError(format!(
174 "unrecognized parameter '{param}'"
175 )));
176 }
177 }
178 }
179
180 if vtab.filename.is_empty() {
181 return Err(Error::ModuleError("no CSV file specified".to_owned()));
182 }
183
184 let mut cols: Vec<String> = Vec::new();
185 if vtab.has_headers || (n_col.is_none() && schema.is_none()) {
186 let mut reader = vtab.reader()?;
187 if vtab.has_headers {
188 {
189 let headers = reader.headers()?;
190 // headers ignored if cols is not empty
191 if n_col.is_none() && schema.is_none() {
192 cols = headers
193 .into_iter()
194 .map(|header| escape_double_quote(header).into_owned())
195 .collect();
196 }
197 }
198 vtab.offset_first_row = reader.position().clone();
199 } else {
200 let mut record = csv::ByteRecord::new();
201 if reader.read_byte_record(&mut record)? {
202 for (i, _) in record.iter().enumerate() {
203 cols.push(format!("c{i}"));
204 }
205 }
206 }
207 } else if let Some(n_col) = n_col {
208 for i in 0..n_col {
209 cols.push(format!("c{i}"));
210 }
211 }
212
213 if cols.is_empty() && schema.is_none() {
214 return Err(Error::ModuleError("no column specified".to_owned()));
215 }
216
217 if schema.is_none() {
218 let mut sql = String::from("CREATE TABLE x(");
219 for (i, col) in cols.iter().enumerate() {
220 sql.push('"');
221 sql.push_str(col);
222 sql.push_str("\" TEXT");
223 if i == cols.len() - 1 {
224 sql.push_str(");");
225 } else {
226 sql.push_str(", ");
227 }
228 }
229 schema = Some(sql);
230 }
231 db.config(VTabConfig::DirectOnly)?;
232 Ok((schema.unwrap(), vtab))
233 }
234
235 // Only a forward full table scan is supported.
best_index(&self, info: &mut IndexInfo) -> Result<()>236 fn best_index(&self, info: &mut IndexInfo) -> Result<()> {
237 info.set_estimated_cost(1_000_000.);
238 Ok(())
239 }
240
open(&mut self) -> Result<CsvTabCursor<'_>>241 fn open(&mut self) -> Result<CsvTabCursor<'_>> {
242 Ok(CsvTabCursor::new(self.reader()?))
243 }
244 }
245
246 impl CreateVTab<'_> for CsvTab {
247 const KIND: VTabKind = VTabKind::Default;
248 }
249
250 /// A cursor for the CSV virtual table
251 #[repr(C)]
252 struct CsvTabCursor<'vtab> {
253 /// Base class. Must be first
254 base: ffi::sqlite3_vtab_cursor,
255 /// The CSV reader object
256 reader: csv::Reader<File>,
257 /// Current cursor position used as rowid
258 row_number: usize,
259 /// Values of the current row
260 cols: csv::StringRecord,
261 eof: bool,
262 phantom: PhantomData<&'vtab CsvTab>,
263 }
264
265 impl CsvTabCursor<'_> {
new<'vtab>(reader: csv::Reader<File>) -> CsvTabCursor<'vtab>266 fn new<'vtab>(reader: csv::Reader<File>) -> CsvTabCursor<'vtab> {
267 CsvTabCursor {
268 base: ffi::sqlite3_vtab_cursor::default(),
269 reader,
270 row_number: 0,
271 cols: csv::StringRecord::new(),
272 eof: false,
273 phantom: PhantomData,
274 }
275 }
276
277 /// Accessor to the associated virtual table.
vtab(&self) -> &CsvTab278 fn vtab(&self) -> &CsvTab {
279 unsafe { &*(self.base.pVtab as *const CsvTab) }
280 }
281 }
282
283 unsafe impl VTabCursor for CsvTabCursor<'_> {
284 // Only a full table scan is supported. So `filter` simply rewinds to
285 // the beginning.
filter( &mut self, _idx_num: c_int, _idx_str: Option<&str>, _args: &Values<'_>, ) -> Result<()>286 fn filter(
287 &mut self,
288 _idx_num: c_int,
289 _idx_str: Option<&str>,
290 _args: &Values<'_>,
291 ) -> Result<()> {
292 {
293 let offset_first_row = self.vtab().offset_first_row.clone();
294 self.reader.seek(offset_first_row)?;
295 }
296 self.row_number = 0;
297 self.next()
298 }
299
next(&mut self) -> Result<()>300 fn next(&mut self) -> Result<()> {
301 {
302 self.eof = self.reader.is_done();
303 if self.eof {
304 return Ok(());
305 }
306
307 self.eof = !self.reader.read_record(&mut self.cols)?;
308 }
309
310 self.row_number += 1;
311 Ok(())
312 }
313
eof(&self) -> bool314 fn eof(&self) -> bool {
315 self.eof
316 }
317
column(&self, ctx: &mut Context, col: c_int) -> Result<()>318 fn column(&self, ctx: &mut Context, col: c_int) -> Result<()> {
319 if col < 0 || col as usize >= self.cols.len() {
320 return Err(Error::ModuleError(format!(
321 "column index out of bounds: {col}"
322 )));
323 }
324 if self.cols.is_empty() {
325 return ctx.set_result(&Null);
326 }
327 // TODO Affinity
328 ctx.set_result(&self.cols[col as usize].to_owned())
329 }
330
rowid(&self) -> Result<i64>331 fn rowid(&self) -> Result<i64> {
332 Ok(self.row_number as i64)
333 }
334 }
335
336 impl From<csv::Error> for Error {
337 #[cold]
from(err: csv::Error) -> Error338 fn from(err: csv::Error) -> Error {
339 Error::ModuleError(err.to_string())
340 }
341 }
342
343 #[cfg(test)]
344 mod test {
345 use crate::vtab::csvtab;
346 use crate::{Connection, Result};
347 use fallible_iterator::FallibleIterator;
348
349 #[test]
test_csv_module() -> Result<()>350 fn test_csv_module() -> Result<()> {
351 let db = Connection::open_in_memory()?;
352 csvtab::load_module(&db)?;
353 db.execute_batch(
354 "CREATE VIRTUAL TABLE vtab USING csv(filename = 'test.csv', header = yes)",
355 )?;
356
357 {
358 let mut s = db.prepare("SELECT rowid, * FROM vtab")?;
359 {
360 let headers = s.column_names();
361 assert_eq!(vec!["rowid", "colA", "colB", "colC"], headers);
362 }
363
364 let ids: Result<Vec<i32>> = s.query([])?.map(|row| row.get::<_, i32>(0)).collect();
365 let sum = ids?.iter().sum::<i32>();
366 assert_eq!(sum, 15);
367 }
368 db.execute_batch("DROP TABLE vtab")
369 }
370
371 #[test]
test_csv_cursor() -> Result<()>372 fn test_csv_cursor() -> Result<()> {
373 let db = Connection::open_in_memory()?;
374 csvtab::load_module(&db)?;
375 db.execute_batch("CREATE VIRTUAL TABLE vtab USING csv(filename='test.csv', header=yes)")?;
376
377 {
378 let mut s = db.prepare(
379 "SELECT v1.rowid, v1.* FROM vtab v1 NATURAL JOIN vtab v2 WHERE \
380 v1.rowid < v2.rowid",
381 )?;
382
383 let mut rows = s.query([])?;
384 let row = rows.next()?.unwrap();
385 assert_eq!(row.get_unwrap::<_, i32>(0), 2);
386 }
387 db.execute_batch("DROP TABLE vtab")
388 }
389 }
390