1 use codespan_reporting::files::Error;
2 #[cfg(feature = "serialization")]
3 use serde::{Deserialize, Serialize};
4 use std::ffi::{OsStr, OsString};
5 use std::num::NonZeroU32;
6
7 use crate::{ByteIndex, ColumnIndex, LineIndex, LineOffset, Location, RawIndex, Span};
8
9 /// A handle that points to a file in the database.
10 #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
11 #[cfg_attr(feature = "serialization", derive(Deserialize, Serialize))]
12 pub struct FileId(NonZeroU32);
13
14 impl FileId {
15 /// Offset of our `FileId`'s numeric value to an index on `Files::files`.
16 ///
17 /// This is to ensure the first `FileId` is non-zero for memory layout optimisations (e.g.
18 /// `Option<FileId>` is 4 bytes)
19 const OFFSET: u32 = 1;
20
new(index: usize) -> FileId21 fn new(index: usize) -> FileId {
22 FileId(NonZeroU32::new(index as u32 + Self::OFFSET).expect("file index cannot be stored"))
23 }
24
get(self) -> usize25 fn get(self) -> usize {
26 (self.0.get() - Self::OFFSET) as usize
27 }
28 }
29
30 /// A database of source files.
31 ///
32 /// The `Source` generic parameter determines how source text is stored. Using [`String`] will have
33 /// `Files` take ownership of all source text. Smart pointer types such as [`Cow<'_, str>`],
34 /// [`Rc<str>`] or [`Arc<str>`] can be used to share the source text with the rest of the program.
35 ///
36 /// [`Cow<'_, str>`]: std::borrow::Cow
37 /// [`Rc<str>`]: std::rc::Rc
38 /// [`Arc<str>`]: std::sync::Arc
39 #[derive(Clone, Debug)]
40 pub struct Files<Source> {
41 files: Vec<File<Source>>,
42 }
43
44 impl<Source> Default for Files<Source>
45 where
46 Source: AsRef<str>,
47 {
default() -> Self48 fn default() -> Self {
49 Self { files: vec![] }
50 }
51 }
52
53 impl<Source> Files<Source>
54 where
55 Source: AsRef<str>,
56 {
57 /// Create a new, empty database of files.
new() -> Self58 pub fn new() -> Self {
59 Files::<Source>::default()
60 }
61
62 /// Add a file to the database, returning the handle that can be used to
63 /// refer to it again.
add(&mut self, name: impl Into<OsString>, source: Source) -> FileId64 pub fn add(&mut self, name: impl Into<OsString>, source: Source) -> FileId {
65 let file_id = FileId::new(self.files.len());
66 self.files.push(File::new(name.into(), source.into()));
67 file_id
68 }
69
70 /// Update a source file in place.
71 ///
72 /// This will mean that any outstanding byte indexes will now point to
73 /// invalid locations.
update(&mut self, file_id: FileId, source: Source)74 pub fn update(&mut self, file_id: FileId, source: Source) {
75 self.get_mut(file_id).update(source.into())
76 }
77
78 /// Get a the source file using the file id.
79 // FIXME: return an option or result?
get(&self, file_id: FileId) -> &File<Source>80 fn get(&self, file_id: FileId) -> &File<Source> {
81 &self.files[file_id.get()]
82 }
83
84 /// Get a the source file using the file id.
85 // FIXME: return an option or result?
get_mut(&mut self, file_id: FileId) -> &mut File<Source>86 fn get_mut(&mut self, file_id: FileId) -> &mut File<Source> {
87 &mut self.files[file_id.get()]
88 }
89
90 /// Get the name of the source file.
91 ///
92 /// ```rust
93 /// use codespan::Files;
94 ///
95 /// let name = "test";
96 ///
97 /// let mut files = Files::new();
98 /// let file_id = files.add(name, "hello world!");
99 ///
100 /// assert_eq!(files.name(file_id), name);
101 /// ```
name(&self, file_id: FileId) -> &OsStr102 pub fn name(&self, file_id: FileId) -> &OsStr {
103 self.get(file_id).name()
104 }
105
106 /// Get the span at the given line index.
107 ///
108 /// ```rust
109 /// use codespan::{Files, LineIndex, Span};
110 ///
111 /// let mut files = Files::new();
112 /// let file_id = files.add("test", "foo\nbar\r\n\nbaz");
113 ///
114 /// let line_sources = (0..4)
115 /// .map(|line| files.line_span(file_id, line).unwrap())
116 /// .collect::<Vec<_>>();
117 ///
118 /// assert_eq!(line_sources,
119 /// [
120 /// Span::new(0, 4), // 0: "foo\n"
121 /// Span::new(4, 9), // 1: "bar\r\n"
122 /// Span::new(9, 10), // 2: ""
123 /// Span::new(10, 13), // 3: "baz"
124 /// ]
125 /// );
126 /// assert!(files.line_span(file_id, 4).is_err());
127 /// ```
line_span( &self, file_id: FileId, line_index: impl Into<LineIndex>, ) -> Result<Span, Error>128 pub fn line_span(
129 &self,
130 file_id: FileId,
131 line_index: impl Into<LineIndex>,
132 ) -> Result<Span, Error> {
133 self.get(file_id).line_span(line_index.into())
134 }
135
136 /// Get the line index at the given byte in the source file.
137 ///
138 /// ```rust
139 /// use codespan::{Files, LineIndex};
140 ///
141 /// let mut files = Files::new();
142 /// let file_id = files.add("test", "foo\nbar\r\n\nbaz");
143 ///
144 /// assert_eq!(files.line_index(file_id, 0), LineIndex::from(0));
145 /// assert_eq!(files.line_index(file_id, 7), LineIndex::from(1));
146 /// assert_eq!(files.line_index(file_id, 8), LineIndex::from(1));
147 /// assert_eq!(files.line_index(file_id, 9), LineIndex::from(2));
148 /// assert_eq!(files.line_index(file_id, 100), LineIndex::from(3));
149 /// ```
line_index(&self, file_id: FileId, byte_index: impl Into<ByteIndex>) -> LineIndex150 pub fn line_index(&self, file_id: FileId, byte_index: impl Into<ByteIndex>) -> LineIndex {
151 self.get(file_id).line_index(byte_index.into())
152 }
153
154 /// Get the location at the given byte index in the source file.
155 ///
156 /// ```rust
157 /// use codespan::{ByteIndex, Files, Location, Span};
158 ///
159 /// let mut files = Files::new();
160 /// let file_id = files.add("test", "foo\nbar\r\n\nbaz");
161 ///
162 /// assert_eq!(files.location(file_id, 0).unwrap(), Location::new(0, 0));
163 /// assert_eq!(files.location(file_id, 7).unwrap(), Location::new(1, 3));
164 /// assert_eq!(files.location(file_id, 8).unwrap(), Location::new(1, 4));
165 /// assert_eq!(files.location(file_id, 9).unwrap(), Location::new(2, 0));
166 /// assert!(files.location(file_id, 100).is_err());
167 /// ```
location( &self, file_id: FileId, byte_index: impl Into<ByteIndex>, ) -> Result<Location, Error>168 pub fn location(
169 &self,
170 file_id: FileId,
171 byte_index: impl Into<ByteIndex>,
172 ) -> Result<Location, Error> {
173 self.get(file_id).location(byte_index.into())
174 }
175
176 /// Get the source of the file.
177 ///
178 /// ```rust
179 /// use codespan::Files;
180 ///
181 /// let source = "hello world!";
182 ///
183 /// let mut files = Files::new();
184 /// let file_id = files.add("test", source);
185 ///
186 /// assert_eq!(*files.source(file_id), source);
187 /// ```
source(&self, file_id: FileId) -> &Source188 pub fn source(&self, file_id: FileId) -> &Source {
189 self.get(file_id).source()
190 }
191
192 /// Return the span of the full source.
193 ///
194 /// ```rust
195 /// use codespan::{Files, Span};
196 ///
197 /// let source = "hello world!";
198 ///
199 /// let mut files = Files::new();
200 /// let file_id = files.add("test", source);
201 ///
202 /// assert_eq!(files.source_span(file_id), Span::from_str(source));
203 /// ```
source_span(&self, file_id: FileId) -> Span204 pub fn source_span(&self, file_id: FileId) -> Span {
205 self.get(file_id).source_span()
206 }
207
208 /// Return a slice of the source file, given a span.
209 ///
210 /// ```rust
211 /// use codespan::{Files, Span};
212 ///
213 /// let mut files = Files::new();
214 /// let file_id = files.add("test", "hello world!");
215 ///
216 /// assert_eq!(files.source_slice(file_id, Span::new(0, 5)).unwrap(), "hello");
217 /// assert!(files.source_slice(file_id, Span::new(0, 100)).is_err());
218 /// ```
source_slice(&self, file_id: FileId, span: impl Into<Span>) -> Result<&str, Error>219 pub fn source_slice(&self, file_id: FileId, span: impl Into<Span>) -> Result<&str, Error> {
220 self.get(file_id).source_slice(span.into())
221 }
222 }
223
224 #[cfg(feature = "reporting")]
225 impl<'a, Source> codespan_reporting::files::Files<'a> for Files<Source>
226 where
227 Source: AsRef<str>,
228 {
229 type FileId = FileId;
230 type Name = String;
231 type Source = &'a str;
232
name(&self, id: FileId) -> Result<String, Error>233 fn name(&self, id: FileId) -> Result<String, Error> {
234 use std::path::PathBuf;
235
236 Ok(PathBuf::from(self.name(id)).display().to_string())
237 }
238
source(&'a self, id: FileId) -> Result<&str, Error>239 fn source(&'a self, id: FileId) -> Result<&str, Error> {
240 Ok(self.source(id).as_ref())
241 }
242
line_index(&self, id: FileId, byte_index: usize) -> Result<usize, Error>243 fn line_index(&self, id: FileId, byte_index: usize) -> Result<usize, Error> {
244 Ok(self.line_index(id, byte_index as u32).to_usize())
245 }
246
line_range( &'a self, id: FileId, line_index: usize, ) -> Result<std::ops::Range<usize>, Error>247 fn line_range(
248 &'a self,
249 id: FileId,
250 line_index: usize,
251 ) -> Result<std::ops::Range<usize>, Error> {
252 let span = self.line_span(id, line_index as u32)?;
253
254 Ok(span.start().to_usize()..span.end().to_usize())
255 }
256 }
257
258 /// A file that is stored in the database.
259 #[derive(Debug, Clone)]
260 // `Serialize` is only implemented on `OsString` for windows/unix
261 #[cfg_attr(
262 all(feature = "serialization", any(windows, unix)),
263 derive(Deserialize, Serialize)
264 )]
265 struct File<Source> {
266 /// The name of the file.
267 name: OsString,
268 /// The source code of the file.
269 source: Source,
270 /// The starting byte indices in the source code.
271 line_starts: Vec<ByteIndex>,
272 }
273
274 impl<Source> File<Source>
275 where
276 Source: AsRef<str>,
277 {
new(name: OsString, source: Source) -> Self278 fn new(name: OsString, source: Source) -> Self {
279 let line_starts = line_starts(source.as_ref())
280 .map(|i| ByteIndex::from(i as u32))
281 .collect();
282
283 File {
284 name,
285 source,
286 line_starts,
287 }
288 }
289
update(&mut self, source: Source)290 fn update(&mut self, source: Source) {
291 let line_starts = line_starts(source.as_ref())
292 .map(|i| ByteIndex::from(i as u32))
293 .collect();
294 self.source = source;
295 self.line_starts = line_starts;
296 }
297
name(&self) -> &OsStr298 fn name(&self) -> &OsStr {
299 &self.name
300 }
301
line_start(&self, line_index: LineIndex) -> Result<ByteIndex, Error>302 fn line_start(&self, line_index: LineIndex) -> Result<ByteIndex, Error> {
303 use std::cmp::Ordering;
304
305 match line_index.cmp(&self.last_line_index()) {
306 Ordering::Less => Ok(self.line_starts[line_index.to_usize()]),
307 Ordering::Equal => Ok(self.source_span().end()),
308 Ordering::Greater => Err(Error::LineTooLarge {
309 given: line_index.to_usize(),
310 max: self.last_line_index().to_usize(),
311 }),
312 }
313 }
314
last_line_index(&self) -> LineIndex315 fn last_line_index(&self) -> LineIndex {
316 LineIndex::from(self.line_starts.len() as RawIndex)
317 }
318
line_span(&self, line_index: LineIndex) -> Result<Span, Error>319 fn line_span(&self, line_index: LineIndex) -> Result<Span, Error> {
320 let line_start = self.line_start(line_index)?;
321 let next_line_start = self.line_start(line_index + LineOffset::from(1))?;
322
323 Ok(Span::new(line_start, next_line_start))
324 }
325
line_index(&self, byte_index: ByteIndex) -> LineIndex326 fn line_index(&self, byte_index: ByteIndex) -> LineIndex {
327 match self.line_starts.binary_search(&byte_index) {
328 // Found the start of a line
329 Ok(line) => LineIndex::from(line as u32),
330 Err(next_line) => LineIndex::from(next_line as u32 - 1),
331 }
332 }
333
location(&self, byte_index: ByteIndex) -> Result<Location, Error>334 fn location(&self, byte_index: ByteIndex) -> Result<Location, Error> {
335 let line_index = self.line_index(byte_index);
336 let line_start_index = self
337 .line_start(line_index)
338 .map_err(|_| Error::IndexTooLarge {
339 given: byte_index.to_usize(),
340 max: self.source().as_ref().len() - 1,
341 })?;
342 let line_src = self
343 .source
344 .as_ref()
345 .get(line_start_index.to_usize()..byte_index.to_usize())
346 .ok_or_else(|| {
347 let given = byte_index.to_usize();
348 let max = self.source().as_ref().len() - 1;
349 if given > max {
350 Error::IndexTooLarge { given, max }
351 } else {
352 Error::InvalidCharBoundary { given }
353 }
354 })?;
355
356 Ok(Location {
357 line: line_index,
358 column: ColumnIndex::from(line_src.chars().count() as u32),
359 })
360 }
361
source(&self) -> &Source362 fn source(&self) -> &Source {
363 &self.source
364 }
365
source_span(&self) -> Span366 fn source_span(&self) -> Span {
367 Span::from_str(self.source.as_ref())
368 }
369
source_slice(&self, span: Span) -> Result<&str, Error>370 fn source_slice(&self, span: Span) -> Result<&str, Error> {
371 let start = span.start().to_usize();
372 let end = span.end().to_usize();
373
374 self.source.as_ref().get(start..end).ok_or_else(|| {
375 let max = self.source().as_ref().len() - 1;
376 Error::IndexTooLarge {
377 given: if start > max { start } else { end },
378 max,
379 }
380 })
381 }
382 }
383
384 // NOTE: this is copied from `codespan_reporting::files::line_starts` and should be kept in sync.
line_starts<'source>(source: &'source str) -> impl 'source + Iterator<Item = usize>385 fn line_starts<'source>(source: &'source str) -> impl 'source + Iterator<Item = usize> {
386 std::iter::once(0).chain(source.match_indices('\n').map(|(i, _)| i + 1))
387 }
388
389 #[cfg(test)]
390 mod test {
391 use super::*;
392
393 const TEST_SOURCE: &str = "foo\nbar\r\n\nbaz";
394
395 #[test]
line_starts()396 fn line_starts() {
397 let mut files = Files::<String>::new();
398 let file_id = files.add("test", TEST_SOURCE.to_owned());
399
400 assert_eq!(
401 files.get(file_id).line_starts,
402 [
403 ByteIndex::from(0), // "foo\n"
404 ByteIndex::from(4), // "bar\r\n"
405 ByteIndex::from(9), // ""
406 ByteIndex::from(10), // "baz"
407 ],
408 );
409 }
410
411 #[test]
line_span_sources()412 fn line_span_sources() {
413 // Also make sure we can use `Arc` for source
414 use std::sync::Arc;
415
416 let mut files = Files::<Arc<str>>::new();
417 let file_id = files.add("test", TEST_SOURCE.into());
418
419 let line_sources = (0..4)
420 .map(|line| {
421 let line_span = files.line_span(file_id, line).unwrap();
422 files.source_slice(file_id, line_span).unwrap()
423 })
424 .collect::<Vec<_>>();
425
426 assert_eq!(line_sources, ["foo\n", "bar\r\n", "\n", "baz"],);
427 }
428 }
429