• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use codespan_reporting::files::Error;
2 #[cfg(feature = "serialization")]
3 use serde::{Deserialize, Serialize};
4 use std::ffi::{OsStr, OsString};
5 use std::num::NonZeroU32;
6 
7 use crate::{ByteIndex, ColumnIndex, LineIndex, LineOffset, Location, RawIndex, Span};
8 
9 /// A handle that points to a file in the database.
10 #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
11 #[cfg_attr(feature = "serialization", derive(Deserialize, Serialize))]
12 pub struct FileId(NonZeroU32);
13 
14 impl FileId {
15     /// Offset of our `FileId`'s numeric value to an index on `Files::files`.
16     ///
17     /// This is to ensure the first `FileId` is non-zero for memory layout optimisations (e.g.
18     /// `Option<FileId>` is 4 bytes)
19     const OFFSET: u32 = 1;
20 
new(index: usize) -> FileId21     fn new(index: usize) -> FileId {
22         FileId(NonZeroU32::new(index as u32 + Self::OFFSET).expect("file index cannot be stored"))
23     }
24 
get(self) -> usize25     fn get(self) -> usize {
26         (self.0.get() - Self::OFFSET) as usize
27     }
28 }
29 
30 /// A database of source files.
31 ///
32 /// The `Source` generic parameter determines how source text is stored. Using [`String`] will have
33 /// `Files` take ownership of all source text. Smart pointer types such as [`Cow<'_, str>`],
34 /// [`Rc<str>`] or [`Arc<str>`] can be used to share the source text with the rest of the program.
35 ///
36 /// [`Cow<'_, str>`]: std::borrow::Cow
37 /// [`Rc<str>`]: std::rc::Rc
38 /// [`Arc<str>`]: std::sync::Arc
39 #[derive(Clone, Debug)]
40 pub struct Files<Source> {
41     files: Vec<File<Source>>,
42 }
43 
44 impl<Source> Default for Files<Source>
45 where
46     Source: AsRef<str>,
47 {
default() -> Self48     fn default() -> Self {
49         Self { files: vec![] }
50     }
51 }
52 
53 impl<Source> Files<Source>
54 where
55     Source: AsRef<str>,
56 {
57     /// Create a new, empty database of files.
new() -> Self58     pub fn new() -> Self {
59         Files::<Source>::default()
60     }
61 
62     /// Add a file to the database, returning the handle that can be used to
63     /// refer to it again.
add(&mut self, name: impl Into<OsString>, source: Source) -> FileId64     pub fn add(&mut self, name: impl Into<OsString>, source: Source) -> FileId {
65         let file_id = FileId::new(self.files.len());
66         self.files.push(File::new(name.into(), source.into()));
67         file_id
68     }
69 
70     /// Update a source file in place.
71     ///
72     /// This will mean that any outstanding byte indexes will now point to
73     /// invalid locations.
update(&mut self, file_id: FileId, source: Source)74     pub fn update(&mut self, file_id: FileId, source: Source) {
75         self.get_mut(file_id).update(source.into())
76     }
77 
78     /// Get a the source file using the file id.
79     // FIXME: return an option or result?
get(&self, file_id: FileId) -> &File<Source>80     fn get(&self, file_id: FileId) -> &File<Source> {
81         &self.files[file_id.get()]
82     }
83 
84     /// Get a the source file using the file id.
85     // FIXME: return an option or result?
get_mut(&mut self, file_id: FileId) -> &mut File<Source>86     fn get_mut(&mut self, file_id: FileId) -> &mut File<Source> {
87         &mut self.files[file_id.get()]
88     }
89 
90     /// Get the name of the source file.
91     ///
92     /// ```rust
93     /// use codespan::Files;
94     ///
95     /// let name = "test";
96     ///
97     /// let mut files = Files::new();
98     /// let file_id = files.add(name, "hello world!");
99     ///
100     /// assert_eq!(files.name(file_id), name);
101     /// ```
name(&self, file_id: FileId) -> &OsStr102     pub fn name(&self, file_id: FileId) -> &OsStr {
103         self.get(file_id).name()
104     }
105 
106     /// Get the span at the given line index.
107     ///
108     /// ```rust
109     /// use codespan::{Files, LineIndex, Span};
110     ///
111     /// let mut files = Files::new();
112     /// let file_id = files.add("test", "foo\nbar\r\n\nbaz");
113     ///
114     /// let line_sources = (0..4)
115     ///     .map(|line| files.line_span(file_id, line).unwrap())
116     ///     .collect::<Vec<_>>();
117     ///
118     /// assert_eq!(line_sources,
119     ///     [
120     ///         Span::new(0, 4),    // 0: "foo\n"
121     ///         Span::new(4, 9),    // 1: "bar\r\n"
122     ///         Span::new(9, 10),   // 2: ""
123     ///         Span::new(10, 13),  // 3: "baz"
124     ///     ]
125     /// );
126     /// assert!(files.line_span(file_id, 4).is_err());
127     /// ```
line_span( &self, file_id: FileId, line_index: impl Into<LineIndex>, ) -> Result<Span, Error>128     pub fn line_span(
129         &self,
130         file_id: FileId,
131         line_index: impl Into<LineIndex>,
132     ) -> Result<Span, Error> {
133         self.get(file_id).line_span(line_index.into())
134     }
135 
136     /// Get the line index at the given byte in the source file.
137     ///
138     /// ```rust
139     /// use codespan::{Files, LineIndex};
140     ///
141     /// let mut files = Files::new();
142     /// let file_id = files.add("test", "foo\nbar\r\n\nbaz");
143     ///
144     /// assert_eq!(files.line_index(file_id, 0), LineIndex::from(0));
145     /// assert_eq!(files.line_index(file_id, 7), LineIndex::from(1));
146     /// assert_eq!(files.line_index(file_id, 8), LineIndex::from(1));
147     /// assert_eq!(files.line_index(file_id, 9), LineIndex::from(2));
148     /// assert_eq!(files.line_index(file_id, 100), LineIndex::from(3));
149     /// ```
line_index(&self, file_id: FileId, byte_index: impl Into<ByteIndex>) -> LineIndex150     pub fn line_index(&self, file_id: FileId, byte_index: impl Into<ByteIndex>) -> LineIndex {
151         self.get(file_id).line_index(byte_index.into())
152     }
153 
154     /// Get the location at the given byte index in the source file.
155     ///
156     /// ```rust
157     /// use codespan::{ByteIndex, Files, Location, Span};
158     ///
159     /// let mut files = Files::new();
160     /// let file_id = files.add("test", "foo\nbar\r\n\nbaz");
161     ///
162     /// assert_eq!(files.location(file_id, 0).unwrap(), Location::new(0, 0));
163     /// assert_eq!(files.location(file_id, 7).unwrap(), Location::new(1, 3));
164     /// assert_eq!(files.location(file_id, 8).unwrap(), Location::new(1, 4));
165     /// assert_eq!(files.location(file_id, 9).unwrap(), Location::new(2, 0));
166     /// assert!(files.location(file_id, 100).is_err());
167     /// ```
location( &self, file_id: FileId, byte_index: impl Into<ByteIndex>, ) -> Result<Location, Error>168     pub fn location(
169         &self,
170         file_id: FileId,
171         byte_index: impl Into<ByteIndex>,
172     ) -> Result<Location, Error> {
173         self.get(file_id).location(byte_index.into())
174     }
175 
176     /// Get the source of the file.
177     ///
178     /// ```rust
179     /// use codespan::Files;
180     ///
181     /// let source = "hello world!";
182     ///
183     /// let mut files = Files::new();
184     /// let file_id = files.add("test", source);
185     ///
186     /// assert_eq!(*files.source(file_id), source);
187     /// ```
source(&self, file_id: FileId) -> &Source188     pub fn source(&self, file_id: FileId) -> &Source {
189         self.get(file_id).source()
190     }
191 
192     /// Return the span of the full source.
193     ///
194     /// ```rust
195     /// use codespan::{Files, Span};
196     ///
197     /// let source = "hello world!";
198     ///
199     /// let mut files = Files::new();
200     /// let file_id = files.add("test", source);
201     ///
202     /// assert_eq!(files.source_span(file_id), Span::from_str(source));
203     /// ```
source_span(&self, file_id: FileId) -> Span204     pub fn source_span(&self, file_id: FileId) -> Span {
205         self.get(file_id).source_span()
206     }
207 
208     /// Return a slice of the source file, given a span.
209     ///
210     /// ```rust
211     /// use codespan::{Files, Span};
212     ///
213     /// let mut files = Files::new();
214     /// let file_id = files.add("test",  "hello world!");
215     ///
216     /// assert_eq!(files.source_slice(file_id, Span::new(0, 5)).unwrap(), "hello");
217     /// assert!(files.source_slice(file_id, Span::new(0, 100)).is_err());
218     /// ```
source_slice(&self, file_id: FileId, span: impl Into<Span>) -> Result<&str, Error>219     pub fn source_slice(&self, file_id: FileId, span: impl Into<Span>) -> Result<&str, Error> {
220         self.get(file_id).source_slice(span.into())
221     }
222 }
223 
224 #[cfg(feature = "reporting")]
225 impl<'a, Source> codespan_reporting::files::Files<'a> for Files<Source>
226 where
227     Source: AsRef<str>,
228 {
229     type FileId = FileId;
230     type Name = String;
231     type Source = &'a str;
232 
name(&self, id: FileId) -> Result<String, Error>233     fn name(&self, id: FileId) -> Result<String, Error> {
234         use std::path::PathBuf;
235 
236         Ok(PathBuf::from(self.name(id)).display().to_string())
237     }
238 
source(&'a self, id: FileId) -> Result<&str, Error>239     fn source(&'a self, id: FileId) -> Result<&str, Error> {
240         Ok(self.source(id).as_ref())
241     }
242 
line_index(&self, id: FileId, byte_index: usize) -> Result<usize, Error>243     fn line_index(&self, id: FileId, byte_index: usize) -> Result<usize, Error> {
244         Ok(self.line_index(id, byte_index as u32).to_usize())
245     }
246 
line_range( &'a self, id: FileId, line_index: usize, ) -> Result<std::ops::Range<usize>, Error>247     fn line_range(
248         &'a self,
249         id: FileId,
250         line_index: usize,
251     ) -> Result<std::ops::Range<usize>, Error> {
252         let span = self.line_span(id, line_index as u32)?;
253 
254         Ok(span.start().to_usize()..span.end().to_usize())
255     }
256 }
257 
258 /// A file that is stored in the database.
259 #[derive(Debug, Clone)]
260 // `Serialize` is only implemented on `OsString` for windows/unix
261 #[cfg_attr(
262     all(feature = "serialization", any(windows, unix)),
263     derive(Deserialize, Serialize)
264 )]
265 struct File<Source> {
266     /// The name of the file.
267     name: OsString,
268     /// The source code of the file.
269     source: Source,
270     /// The starting byte indices in the source code.
271     line_starts: Vec<ByteIndex>,
272 }
273 
274 impl<Source> File<Source>
275 where
276     Source: AsRef<str>,
277 {
new(name: OsString, source: Source) -> Self278     fn new(name: OsString, source: Source) -> Self {
279         let line_starts = line_starts(source.as_ref())
280             .map(|i| ByteIndex::from(i as u32))
281             .collect();
282 
283         File {
284             name,
285             source,
286             line_starts,
287         }
288     }
289 
update(&mut self, source: Source)290     fn update(&mut self, source: Source) {
291         let line_starts = line_starts(source.as_ref())
292             .map(|i| ByteIndex::from(i as u32))
293             .collect();
294         self.source = source;
295         self.line_starts = line_starts;
296     }
297 
name(&self) -> &OsStr298     fn name(&self) -> &OsStr {
299         &self.name
300     }
301 
line_start(&self, line_index: LineIndex) -> Result<ByteIndex, Error>302     fn line_start(&self, line_index: LineIndex) -> Result<ByteIndex, Error> {
303         use std::cmp::Ordering;
304 
305         match line_index.cmp(&self.last_line_index()) {
306             Ordering::Less => Ok(self.line_starts[line_index.to_usize()]),
307             Ordering::Equal => Ok(self.source_span().end()),
308             Ordering::Greater => Err(Error::LineTooLarge {
309                 given: line_index.to_usize(),
310                 max: self.last_line_index().to_usize(),
311             }),
312         }
313     }
314 
last_line_index(&self) -> LineIndex315     fn last_line_index(&self) -> LineIndex {
316         LineIndex::from(self.line_starts.len() as RawIndex)
317     }
318 
line_span(&self, line_index: LineIndex) -> Result<Span, Error>319     fn line_span(&self, line_index: LineIndex) -> Result<Span, Error> {
320         let line_start = self.line_start(line_index)?;
321         let next_line_start = self.line_start(line_index + LineOffset::from(1))?;
322 
323         Ok(Span::new(line_start, next_line_start))
324     }
325 
line_index(&self, byte_index: ByteIndex) -> LineIndex326     fn line_index(&self, byte_index: ByteIndex) -> LineIndex {
327         match self.line_starts.binary_search(&byte_index) {
328             // Found the start of a line
329             Ok(line) => LineIndex::from(line as u32),
330             Err(next_line) => LineIndex::from(next_line as u32 - 1),
331         }
332     }
333 
location(&self, byte_index: ByteIndex) -> Result<Location, Error>334     fn location(&self, byte_index: ByteIndex) -> Result<Location, Error> {
335         let line_index = self.line_index(byte_index);
336         let line_start_index = self
337             .line_start(line_index)
338             .map_err(|_| Error::IndexTooLarge {
339                 given: byte_index.to_usize(),
340                 max: self.source().as_ref().len() - 1,
341             })?;
342         let line_src = self
343             .source
344             .as_ref()
345             .get(line_start_index.to_usize()..byte_index.to_usize())
346             .ok_or_else(|| {
347                 let given = byte_index.to_usize();
348                 let max = self.source().as_ref().len() - 1;
349                 if given > max {
350                     Error::IndexTooLarge { given, max }
351                 } else {
352                     Error::InvalidCharBoundary { given }
353                 }
354             })?;
355 
356         Ok(Location {
357             line: line_index,
358             column: ColumnIndex::from(line_src.chars().count() as u32),
359         })
360     }
361 
source(&self) -> &Source362     fn source(&self) -> &Source {
363         &self.source
364     }
365 
source_span(&self) -> Span366     fn source_span(&self) -> Span {
367         Span::from_str(self.source.as_ref())
368     }
369 
source_slice(&self, span: Span) -> Result<&str, Error>370     fn source_slice(&self, span: Span) -> Result<&str, Error> {
371         let start = span.start().to_usize();
372         let end = span.end().to_usize();
373 
374         self.source.as_ref().get(start..end).ok_or_else(|| {
375             let max = self.source().as_ref().len() - 1;
376             Error::IndexTooLarge {
377                 given: if start > max { start } else { end },
378                 max,
379             }
380         })
381     }
382 }
383 
384 // NOTE: this is copied from `codespan_reporting::files::line_starts` and should be kept in sync.
line_starts<'source>(source: &'source str) -> impl 'source + Iterator<Item = usize>385 fn line_starts<'source>(source: &'source str) -> impl 'source + Iterator<Item = usize> {
386     std::iter::once(0).chain(source.match_indices('\n').map(|(i, _)| i + 1))
387 }
388 
389 #[cfg(test)]
390 mod test {
391     use super::*;
392 
393     const TEST_SOURCE: &str = "foo\nbar\r\n\nbaz";
394 
395     #[test]
line_starts()396     fn line_starts() {
397         let mut files = Files::<String>::new();
398         let file_id = files.add("test", TEST_SOURCE.to_owned());
399 
400         assert_eq!(
401             files.get(file_id).line_starts,
402             [
403                 ByteIndex::from(0),  // "foo\n"
404                 ByteIndex::from(4),  // "bar\r\n"
405                 ByteIndex::from(9),  // ""
406                 ByteIndex::from(10), // "baz"
407             ],
408         );
409     }
410 
411     #[test]
line_span_sources()412     fn line_span_sources() {
413         // Also make sure we can use `Arc` for source
414         use std::sync::Arc;
415 
416         let mut files = Files::<Arc<str>>::new();
417         let file_id = files.add("test", TEST_SOURCE.into());
418 
419         let line_sources = (0..4)
420             .map(|line| {
421                 let line_span = files.line_span(file_id, line).unwrap();
422                 files.source_slice(file_id, line_span).unwrap()
423             })
424             .collect::<Vec<_>>();
425 
426         assert_eq!(line_sources, ["foo\n", "bar\r\n", "\n", "baz"],);
427     }
428 }
429