• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Source positions and related helper functions.
2 //!
3 //! Important concepts in this module include:
4 //!
5 //! - the *span*, represented by [`SpanData`] and related types;
6 //! - source code as represented by a [`SourceMap`]; and
7 //! - interned strings, represented by [`Symbol`]s, with some common symbols available statically in the [`sym`] module.
8 //!
9 //! Unlike most compilers, the span contains not only the position in the source code, but also various other metadata,
10 //! such as the edition and macro hygiene. This metadata is stored in [`SyntaxContext`] and [`ExpnData`].
11 //!
12 //! ## Note
13 //!
14 //! This API is completely unstable and subject to change.
15 
16 #![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")]
17 #![feature(array_windows)]
18 #![feature(if_let_guard)]
19 #![feature(negative_impls)]
20 #![feature(min_specialization)]
21 #![feature(rustc_attrs)]
22 #![feature(let_chains)]
23 #![feature(round_char_boundary)]
24 #![deny(rustc::untranslatable_diagnostic)]
25 #![deny(rustc::diagnostic_outside_of_impl)]
26 
27 #[macro_use]
28 extern crate rustc_macros;
29 
30 #[macro_use]
31 extern crate tracing;
32 
33 use rustc_data_structures::AtomicRef;
34 use rustc_macros::HashStable_Generic;
35 use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
36 
37 mod caching_source_map_view;
38 pub mod source_map;
39 pub use self::caching_source_map_view::CachingSourceMapView;
40 use source_map::SourceMap;
41 
42 pub mod edition;
43 use edition::Edition;
44 pub mod hygiene;
45 use hygiene::Transparency;
46 pub use hygiene::{DesugaringKind, ExpnKind, MacroKind};
47 pub use hygiene::{ExpnData, ExpnHash, ExpnId, LocalExpnId, SyntaxContext};
48 use rustc_data_structures::stable_hasher::HashingControls;
49 pub mod def_id;
50 use def_id::{CrateNum, DefId, DefPathHash, LocalDefId, LOCAL_CRATE};
51 pub mod edit_distance;
52 mod span_encoding;
53 pub use span_encoding::{Span, DUMMY_SP};
54 
55 pub mod symbol;
56 pub use symbol::{sym, Symbol};
57 
58 mod analyze_source_file;
59 pub mod fatal_error;
60 
61 pub mod profiling;
62 
63 use rustc_data_structures::stable_hasher::{Hash128, Hash64, HashStable, StableHasher};
64 use rustc_data_structures::sync::{Lock, Lrc};
65 
66 use std::borrow::Cow;
67 use std::cmp::{self, Ordering};
68 use std::hash::Hash;
69 use std::ops::{Add, Range, Sub};
70 use std::path::{Path, PathBuf};
71 use std::str::FromStr;
72 use std::{fmt, iter};
73 
74 use md5::Digest;
75 use md5::Md5;
76 use sha1::Sha1;
77 use sha2::Sha256;
78 
79 #[cfg(test)]
80 mod tests;
81 
82 /// Per-session global variables: this struct is stored in thread-local storage
83 /// in such a way that it is accessible without any kind of handle to all
84 /// threads within the compilation session, but is not accessible outside the
85 /// session.
86 pub struct SessionGlobals {
87     symbol_interner: symbol::Interner,
88     span_interner: Lock<span_encoding::SpanInterner>,
89     hygiene_data: Lock<hygiene::HygieneData>,
90 
91     /// A reference to the source map in the `Session`. It's an `Option`
92     /// because it can't be initialized until `Session` is created, which
93     /// happens after `SessionGlobals`. `set_source_map` does the
94     /// initialization.
95     ///
96     /// This field should only be used in places where the `Session` is truly
97     /// not available, such as `<Span as Debug>::fmt`.
98     source_map: Lock<Option<Lrc<SourceMap>>>,
99 }
100 
101 impl SessionGlobals {
new(edition: Edition) -> SessionGlobals102     pub fn new(edition: Edition) -> SessionGlobals {
103         SessionGlobals {
104             symbol_interner: symbol::Interner::fresh(),
105             span_interner: Lock::new(span_encoding::SpanInterner::default()),
106             hygiene_data: Lock::new(hygiene::HygieneData::new(edition)),
107             source_map: Lock::new(None),
108         }
109     }
110 }
111 
112 #[inline]
create_session_globals_then<R>(edition: Edition, f: impl FnOnce() -> R) -> R113 pub fn create_session_globals_then<R>(edition: Edition, f: impl FnOnce() -> R) -> R {
114     assert!(
115         !SESSION_GLOBALS.is_set(),
116         "SESSION_GLOBALS should never be overwritten! \
117          Use another thread if you need another SessionGlobals"
118     );
119     let session_globals = SessionGlobals::new(edition);
120     SESSION_GLOBALS.set(&session_globals, f)
121 }
122 
123 #[inline]
set_session_globals_then<R>(session_globals: &SessionGlobals, f: impl FnOnce() -> R) -> R124 pub fn set_session_globals_then<R>(session_globals: &SessionGlobals, f: impl FnOnce() -> R) -> R {
125     assert!(
126         !SESSION_GLOBALS.is_set(),
127         "SESSION_GLOBALS should never be overwritten! \
128          Use another thread if you need another SessionGlobals"
129     );
130     SESSION_GLOBALS.set(session_globals, f)
131 }
132 
133 #[inline]
create_default_session_if_not_set_then<R, F>(f: F) -> R where F: FnOnce(&SessionGlobals) -> R,134 pub fn create_default_session_if_not_set_then<R, F>(f: F) -> R
135 where
136     F: FnOnce(&SessionGlobals) -> R,
137 {
138     create_session_if_not_set_then(edition::DEFAULT_EDITION, f)
139 }
140 
141 #[inline]
create_session_if_not_set_then<R, F>(edition: Edition, f: F) -> R where F: FnOnce(&SessionGlobals) -> R,142 pub fn create_session_if_not_set_then<R, F>(edition: Edition, f: F) -> R
143 where
144     F: FnOnce(&SessionGlobals) -> R,
145 {
146     if !SESSION_GLOBALS.is_set() {
147         let session_globals = SessionGlobals::new(edition);
148         SESSION_GLOBALS.set(&session_globals, || SESSION_GLOBALS.with(f))
149     } else {
150         SESSION_GLOBALS.with(f)
151     }
152 }
153 
154 #[inline]
with_session_globals<R, F>(f: F) -> R where F: FnOnce(&SessionGlobals) -> R,155 pub fn with_session_globals<R, F>(f: F) -> R
156 where
157     F: FnOnce(&SessionGlobals) -> R,
158 {
159     SESSION_GLOBALS.with(f)
160 }
161 
162 #[inline]
create_default_session_globals_then<R>(f: impl FnOnce() -> R) -> R163 pub fn create_default_session_globals_then<R>(f: impl FnOnce() -> R) -> R {
164     create_session_globals_then(edition::DEFAULT_EDITION, f)
165 }
166 
167 // If this ever becomes non thread-local, `decode_syntax_context`
168 // and `decode_expn_id` will need to be updated to handle concurrent
169 // deserialization.
170 scoped_tls::scoped_thread_local!(static SESSION_GLOBALS: SessionGlobals);
171 
172 // FIXME: We should use this enum or something like it to get rid of the
173 // use of magic `/rust/1.x/...` paths across the board.
174 #[derive(Debug, Eq, PartialEq, Clone, Ord, PartialOrd)]
175 #[derive(Decodable)]
176 pub enum RealFileName {
177     LocalPath(PathBuf),
178     /// For remapped paths (namely paths into libstd that have been mapped
179     /// to the appropriate spot on the local host's file system, and local file
180     /// system paths that have been remapped with `FilePathMapping`),
181     Remapped {
182         /// `local_path` is the (host-dependent) local path to the file. This is
183         /// None if the file was imported from another crate
184         local_path: Option<PathBuf>,
185         /// `virtual_name` is the stable path rustc will store internally within
186         /// build artifacts.
187         virtual_name: PathBuf,
188     },
189 }
190 
191 impl Hash for RealFileName {
hash<H: std::hash::Hasher>(&self, state: &mut H)192     fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
193         // To prevent #70924 from happening again we should only hash the
194         // remapped (virtualized) path if that exists. This is because
195         // virtualized paths to sysroot crates (/rust/$hash or /rust/$version)
196         // remain stable even if the corresponding local_path changes
197         self.remapped_path_if_available().hash(state)
198     }
199 }
200 
201 // This is functionally identical to #[derive(Encodable)], with the exception of
202 // an added assert statement
203 impl<S: Encoder> Encodable<S> for RealFileName {
encode(&self, encoder: &mut S)204     fn encode(&self, encoder: &mut S) {
205         match *self {
206             RealFileName::LocalPath(ref local_path) => encoder.emit_enum_variant(0, |encoder| {
207                 local_path.encode(encoder);
208             }),
209 
210             RealFileName::Remapped { ref local_path, ref virtual_name } => encoder
211                 .emit_enum_variant(1, |encoder| {
212                     // For privacy and build reproducibility, we must not embed host-dependant path in artifacts
213                     // if they have been remapped by --remap-path-prefix
214                     assert!(local_path.is_none());
215                     local_path.encode(encoder);
216                     virtual_name.encode(encoder);
217                 }),
218         }
219     }
220 }
221 
222 impl RealFileName {
223     /// Returns the path suitable for reading from the file system on the local host,
224     /// if this information exists.
225     /// Avoid embedding this in build artifacts; see `remapped_path_if_available()` for that.
local_path(&self) -> Option<&Path>226     pub fn local_path(&self) -> Option<&Path> {
227         match self {
228             RealFileName::LocalPath(p) => Some(p),
229             RealFileName::Remapped { local_path, virtual_name: _ } => local_path.as_deref(),
230         }
231     }
232 
233     /// Returns the path suitable for reading from the file system on the local host,
234     /// if this information exists.
235     /// Avoid embedding this in build artifacts; see `remapped_path_if_available()` for that.
into_local_path(self) -> Option<PathBuf>236     pub fn into_local_path(self) -> Option<PathBuf> {
237         match self {
238             RealFileName::LocalPath(p) => Some(p),
239             RealFileName::Remapped { local_path: p, virtual_name: _ } => p,
240         }
241     }
242 
243     /// Returns the path suitable for embedding into build artifacts. This would still
244     /// be a local path if it has not been remapped. A remapped path will not correspond
245     /// to a valid file system path: see `local_path_if_available()` for something that
246     /// is more likely to return paths into the local host file system.
remapped_path_if_available(&self) -> &Path247     pub fn remapped_path_if_available(&self) -> &Path {
248         match self {
249             RealFileName::LocalPath(p)
250             | RealFileName::Remapped { local_path: _, virtual_name: p } => p,
251         }
252     }
253 
254     /// Returns the path suitable for reading from the file system on the local host,
255     /// if this information exists. Otherwise returns the remapped name.
256     /// Avoid embedding this in build artifacts; see `remapped_path_if_available()` for that.
local_path_if_available(&self) -> &Path257     pub fn local_path_if_available(&self) -> &Path {
258         match self {
259             RealFileName::LocalPath(path)
260             | RealFileName::Remapped { local_path: None, virtual_name: path }
261             | RealFileName::Remapped { local_path: Some(path), virtual_name: _ } => path,
262         }
263     }
264 
to_string_lossy(&self, display_pref: FileNameDisplayPreference) -> Cow<'_, str>265     pub fn to_string_lossy(&self, display_pref: FileNameDisplayPreference) -> Cow<'_, str> {
266         match display_pref {
267             FileNameDisplayPreference::Local => self.local_path_if_available().to_string_lossy(),
268             FileNameDisplayPreference::Remapped => {
269                 self.remapped_path_if_available().to_string_lossy()
270             }
271             FileNameDisplayPreference::Short => self
272                 .local_path_if_available()
273                 .file_name()
274                 .map_or_else(|| "".into(), |f| f.to_string_lossy()),
275         }
276     }
277 }
278 
279 /// Differentiates between real files and common virtual files.
280 #[derive(Debug, Eq, PartialEq, Clone, Ord, PartialOrd, Hash)]
281 #[derive(Decodable, Encodable)]
282 pub enum FileName {
283     Real(RealFileName),
284     /// Call to `quote!`.
285     QuoteExpansion(Hash64),
286     /// Command line.
287     Anon(Hash64),
288     /// Hack in `src/librustc_ast/parse.rs`.
289     // FIXME(jseyfried)
290     MacroExpansion(Hash64),
291     ProcMacroSourceCode(Hash64),
292     /// Strings provided as `--cfg [cfgspec]` stored in a `crate_cfg`.
293     CfgSpec(Hash64),
294     /// Strings provided as crate attributes in the CLI.
295     CliCrateAttr(Hash64),
296     /// Custom sources for explicit parser calls from plugins and drivers.
297     Custom(String),
298     DocTest(PathBuf, isize),
299     /// Post-substitution inline assembly from LLVM.
300     InlineAsm(Hash64),
301 }
302 
303 impl From<PathBuf> for FileName {
from(p: PathBuf) -> Self304     fn from(p: PathBuf) -> Self {
305         assert!(!p.to_string_lossy().ends_with('>'));
306         FileName::Real(RealFileName::LocalPath(p))
307     }
308 }
309 
310 #[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
311 pub enum FileNameDisplayPreference {
312     /// Display the path after the application of rewrite rules provided via `--remap-path-prefix`.
313     /// This is appropriate for paths that get embedded into files produced by the compiler.
314     Remapped,
315     /// Display the path before the application of rewrite rules provided via `--remap-path-prefix`.
316     /// This is appropriate for use in user-facing output (such as diagnostics).
317     Local,
318     /// Display only the filename, as a way to reduce the verbosity of the output.
319     /// This is appropriate for use in user-facing output (such as diagnostics).
320     Short,
321 }
322 
323 pub struct FileNameDisplay<'a> {
324     inner: &'a FileName,
325     display_pref: FileNameDisplayPreference,
326 }
327 
328 impl fmt::Display for FileNameDisplay<'_> {
fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result329     fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
330         use FileName::*;
331         match *self.inner {
332             Real(ref name) => {
333                 write!(fmt, "{}", name.to_string_lossy(self.display_pref))
334             }
335             QuoteExpansion(_) => write!(fmt, "<quote expansion>"),
336             MacroExpansion(_) => write!(fmt, "<macro expansion>"),
337             Anon(_) => write!(fmt, "<anon>"),
338             ProcMacroSourceCode(_) => write!(fmt, "<proc-macro source code>"),
339             CfgSpec(_) => write!(fmt, "<cfgspec>"),
340             CliCrateAttr(_) => write!(fmt, "<crate attribute>"),
341             Custom(ref s) => write!(fmt, "<{s}>"),
342             DocTest(ref path, _) => write!(fmt, "{}", path.display()),
343             InlineAsm(_) => write!(fmt, "<inline asm>"),
344         }
345     }
346 }
347 
348 impl<'a> FileNameDisplay<'a> {
to_string_lossy(&self) -> Cow<'a, str>349     pub fn to_string_lossy(&self) -> Cow<'a, str> {
350         match self.inner {
351             FileName::Real(ref inner) => inner.to_string_lossy(self.display_pref),
352             _ => Cow::from(self.to_string()),
353         }
354     }
355 }
356 
357 impl FileName {
is_real(&self) -> bool358     pub fn is_real(&self) -> bool {
359         use FileName::*;
360         match *self {
361             Real(_) => true,
362             Anon(_)
363             | MacroExpansion(_)
364             | ProcMacroSourceCode(_)
365             | CfgSpec(_)
366             | CliCrateAttr(_)
367             | Custom(_)
368             | QuoteExpansion(_)
369             | DocTest(_, _)
370             | InlineAsm(_) => false,
371         }
372     }
373 
prefer_remapped(&self) -> FileNameDisplay<'_>374     pub fn prefer_remapped(&self) -> FileNameDisplay<'_> {
375         FileNameDisplay { inner: self, display_pref: FileNameDisplayPreference::Remapped }
376     }
377 
378     /// This may include transient local filesystem information.
379     /// Must not be embedded in build outputs.
prefer_local(&self) -> FileNameDisplay<'_>380     pub fn prefer_local(&self) -> FileNameDisplay<'_> {
381         FileNameDisplay { inner: self, display_pref: FileNameDisplayPreference::Local }
382     }
383 
display(&self, display_pref: FileNameDisplayPreference) -> FileNameDisplay<'_>384     pub fn display(&self, display_pref: FileNameDisplayPreference) -> FileNameDisplay<'_> {
385         FileNameDisplay { inner: self, display_pref }
386     }
387 
macro_expansion_source_code(src: &str) -> FileName388     pub fn macro_expansion_source_code(src: &str) -> FileName {
389         let mut hasher = StableHasher::new();
390         src.hash(&mut hasher);
391         FileName::MacroExpansion(hasher.finish())
392     }
393 
anon_source_code(src: &str) -> FileName394     pub fn anon_source_code(src: &str) -> FileName {
395         let mut hasher = StableHasher::new();
396         src.hash(&mut hasher);
397         FileName::Anon(hasher.finish())
398     }
399 
proc_macro_source_code(src: &str) -> FileName400     pub fn proc_macro_source_code(src: &str) -> FileName {
401         let mut hasher = StableHasher::new();
402         src.hash(&mut hasher);
403         FileName::ProcMacroSourceCode(hasher.finish())
404     }
405 
cfg_spec_source_code(src: &str) -> FileName406     pub fn cfg_spec_source_code(src: &str) -> FileName {
407         let mut hasher = StableHasher::new();
408         src.hash(&mut hasher);
409         FileName::QuoteExpansion(hasher.finish())
410     }
411 
cli_crate_attr_source_code(src: &str) -> FileName412     pub fn cli_crate_attr_source_code(src: &str) -> FileName {
413         let mut hasher = StableHasher::new();
414         src.hash(&mut hasher);
415         FileName::CliCrateAttr(hasher.finish())
416     }
417 
doc_test_source_code(path: PathBuf, line: isize) -> FileName418     pub fn doc_test_source_code(path: PathBuf, line: isize) -> FileName {
419         FileName::DocTest(path, line)
420     }
421 
inline_asm_source_code(src: &str) -> FileName422     pub fn inline_asm_source_code(src: &str) -> FileName {
423         let mut hasher = StableHasher::new();
424         src.hash(&mut hasher);
425         FileName::InlineAsm(hasher.finish())
426     }
427 }
428 
429 /// Represents a span.
430 ///
431 /// Spans represent a region of code, used for error reporting. Positions in spans
432 /// are *absolute* positions from the beginning of the [`SourceMap`], not positions
433 /// relative to [`SourceFile`]s. Methods on the `SourceMap` can be used to relate spans back
434 /// to the original source.
435 ///
436 /// You must be careful if the span crosses more than one file, since you will not be
437 /// able to use many of the functions on spans in source_map and you cannot assume
438 /// that the length of the span is equal to `span.hi - span.lo`; there may be space in the
439 /// [`BytePos`] range between files.
440 ///
441 /// `SpanData` is public because `Span` uses a thread-local interner and can't be
442 /// sent to other threads, but some pieces of performance infra run in a separate thread.
443 /// Using `Span` is generally preferred.
444 #[derive(Clone, Copy, Hash, PartialEq, Eq)]
445 pub struct SpanData {
446     pub lo: BytePos,
447     pub hi: BytePos,
448     /// Information about where the macro came from, if this piece of
449     /// code was created by a macro expansion.
450     pub ctxt: SyntaxContext,
451     pub parent: Option<LocalDefId>,
452 }
453 
454 // Order spans by position in the file.
455 impl Ord for SpanData {
cmp(&self, other: &Self) -> Ordering456     fn cmp(&self, other: &Self) -> Ordering {
457         let SpanData {
458             lo: s_lo,
459             hi: s_hi,
460             ctxt: s_ctxt,
461             // `LocalDefId` does not implement `Ord`.
462             // The other fields are enough to determine in-file order.
463             parent: _,
464         } = self;
465         let SpanData {
466             lo: o_lo,
467             hi: o_hi,
468             ctxt: o_ctxt,
469             // `LocalDefId` does not implement `Ord`.
470             // The other fields are enough to determine in-file order.
471             parent: _,
472         } = other;
473 
474         (s_lo, s_hi, s_ctxt).cmp(&(o_lo, o_hi, o_ctxt))
475     }
476 }
477 
478 impl PartialOrd for SpanData {
partial_cmp(&self, other: &Self) -> Option<Ordering>479     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
480         Some(self.cmp(other))
481     }
482 }
483 
484 impl SpanData {
485     #[inline]
span(&self) -> Span486     pub fn span(&self) -> Span {
487         Span::new(self.lo, self.hi, self.ctxt, self.parent)
488     }
489     #[inline]
with_lo(&self, lo: BytePos) -> Span490     pub fn with_lo(&self, lo: BytePos) -> Span {
491         Span::new(lo, self.hi, self.ctxt, self.parent)
492     }
493     #[inline]
with_hi(&self, hi: BytePos) -> Span494     pub fn with_hi(&self, hi: BytePos) -> Span {
495         Span::new(self.lo, hi, self.ctxt, self.parent)
496     }
497     #[inline]
with_ctxt(&self, ctxt: SyntaxContext) -> Span498     pub fn with_ctxt(&self, ctxt: SyntaxContext) -> Span {
499         Span::new(self.lo, self.hi, ctxt, self.parent)
500     }
501     #[inline]
with_parent(&self, parent: Option<LocalDefId>) -> Span502     pub fn with_parent(&self, parent: Option<LocalDefId>) -> Span {
503         Span::new(self.lo, self.hi, self.ctxt, parent)
504     }
505     /// Returns `true` if this is a dummy span with any hygienic context.
506     #[inline]
is_dummy(self) -> bool507     pub fn is_dummy(self) -> bool {
508         self.lo.0 == 0 && self.hi.0 == 0
509     }
510     #[inline]
is_visible(self, sm: &SourceMap) -> bool511     pub fn is_visible(self, sm: &SourceMap) -> bool {
512         !self.is_dummy() && sm.is_span_accessible(self.span())
513     }
514     /// Returns `true` if `self` fully encloses `other`.
contains(self, other: Self) -> bool515     pub fn contains(self, other: Self) -> bool {
516         self.lo <= other.lo && other.hi <= self.hi
517     }
518 }
519 
520 // The interner is pointed to by a thread local value which is only set on the main thread
521 // with parallelization is disabled. So we don't allow `Span` to transfer between threads
522 // to avoid panics and other errors, even though it would be memory safe to do so.
523 #[cfg(not(parallel_compiler))]
524 impl !Send for Span {}
525 #[cfg(not(parallel_compiler))]
526 impl !Sync for Span {}
527 
528 impl PartialOrd for Span {
partial_cmp(&self, rhs: &Self) -> Option<Ordering>529     fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> {
530         PartialOrd::partial_cmp(&self.data(), &rhs.data())
531     }
532 }
533 impl Ord for Span {
cmp(&self, rhs: &Self) -> Ordering534     fn cmp(&self, rhs: &Self) -> Ordering {
535         Ord::cmp(&self.data(), &rhs.data())
536     }
537 }
538 
539 impl Span {
540     #[inline]
lo(self) -> BytePos541     pub fn lo(self) -> BytePos {
542         self.data().lo
543     }
544     #[inline]
with_lo(self, lo: BytePos) -> Span545     pub fn with_lo(self, lo: BytePos) -> Span {
546         self.data().with_lo(lo)
547     }
548     #[inline]
hi(self) -> BytePos549     pub fn hi(self) -> BytePos {
550         self.data().hi
551     }
552     #[inline]
with_hi(self, hi: BytePos) -> Span553     pub fn with_hi(self, hi: BytePos) -> Span {
554         self.data().with_hi(hi)
555     }
556     #[inline]
eq_ctxt(self, other: Span) -> bool557     pub fn eq_ctxt(self, other: Span) -> bool {
558         self.data_untracked().ctxt == other.data_untracked().ctxt
559     }
560     #[inline]
with_ctxt(self, ctxt: SyntaxContext) -> Span561     pub fn with_ctxt(self, ctxt: SyntaxContext) -> Span {
562         self.data_untracked().with_ctxt(ctxt)
563     }
564     #[inline]
parent(self) -> Option<LocalDefId>565     pub fn parent(self) -> Option<LocalDefId> {
566         self.data().parent
567     }
568     #[inline]
with_parent(self, ctxt: Option<LocalDefId>) -> Span569     pub fn with_parent(self, ctxt: Option<LocalDefId>) -> Span {
570         self.data().with_parent(ctxt)
571     }
572 
573     /// Returns `true` if this is a dummy span with any hygienic context.
574     #[inline]
is_dummy(self) -> bool575     pub fn is_dummy(self) -> bool {
576         self.data_untracked().is_dummy()
577     }
578 
579     #[inline]
is_visible(self, sm: &SourceMap) -> bool580     pub fn is_visible(self, sm: &SourceMap) -> bool {
581         self.data_untracked().is_visible(sm)
582     }
583 
584     /// Returns `true` if this span comes from any kind of macro, desugaring or inlining.
585     #[inline]
from_expansion(self) -> bool586     pub fn from_expansion(self) -> bool {
587         self.ctxt() != SyntaxContext::root()
588     }
589 
590     /// Returns `true` if `span` originates in a macro's expansion where debuginfo should be
591     /// collapsed.
in_macro_expansion_with_collapse_debuginfo(self) -> bool592     pub fn in_macro_expansion_with_collapse_debuginfo(self) -> bool {
593         let outer_expn = self.ctxt().outer_expn_data();
594         matches!(outer_expn.kind, ExpnKind::Macro(..)) && outer_expn.collapse_debuginfo
595     }
596 
597     /// Returns `true` if `span` originates in a derive-macro's expansion.
in_derive_expansion(self) -> bool598     pub fn in_derive_expansion(self) -> bool {
599         matches!(self.ctxt().outer_expn_data().kind, ExpnKind::Macro(MacroKind::Derive, _))
600     }
601 
602     /// Gate suggestions that would not be appropriate in a context the user didn't write.
can_be_used_for_suggestions(self) -> bool603     pub fn can_be_used_for_suggestions(self) -> bool {
604         !self.from_expansion()
605         // FIXME: If this span comes from a `derive` macro but it points at code the user wrote,
606         // the callsite span and the span will be pointing at different places. It also means that
607         // we can safely provide suggestions on this span.
608             || (matches!(self.ctxt().outer_expn_data().kind, ExpnKind::Macro(MacroKind::Derive, _))
609                 && self.parent_callsite().map(|p| (p.lo(), p.hi())) != Some((self.lo(), self.hi())))
610     }
611 
612     #[inline]
with_root_ctxt(lo: BytePos, hi: BytePos) -> Span613     pub fn with_root_ctxt(lo: BytePos, hi: BytePos) -> Span {
614         Span::new(lo, hi, SyntaxContext::root(), None)
615     }
616 
617     /// Returns a new span representing an empty span at the beginning of this span.
618     #[inline]
shrink_to_lo(self) -> Span619     pub fn shrink_to_lo(self) -> Span {
620         let span = self.data_untracked();
621         span.with_hi(span.lo)
622     }
623     /// Returns a new span representing an empty span at the end of this span.
624     #[inline]
shrink_to_hi(self) -> Span625     pub fn shrink_to_hi(self) -> Span {
626         let span = self.data_untracked();
627         span.with_lo(span.hi)
628     }
629 
630     #[inline]
631     /// Returns `true` if `hi == lo`.
is_empty(self) -> bool632     pub fn is_empty(self) -> bool {
633         let span = self.data_untracked();
634         span.hi == span.lo
635     }
636 
637     /// Returns `self` if `self` is not the dummy span, and `other` otherwise.
substitute_dummy(self, other: Span) -> Span638     pub fn substitute_dummy(self, other: Span) -> Span {
639         if self.is_dummy() { other } else { self }
640     }
641 
642     /// Returns `true` if `self` fully encloses `other`.
contains(self, other: Span) -> bool643     pub fn contains(self, other: Span) -> bool {
644         let span = self.data();
645         let other = other.data();
646         span.contains(other)
647     }
648 
649     /// Returns `true` if `self` touches `other`.
overlaps(self, other: Span) -> bool650     pub fn overlaps(self, other: Span) -> bool {
651         let span = self.data();
652         let other = other.data();
653         span.lo < other.hi && other.lo < span.hi
654     }
655 
656     /// Returns `true` if the spans are equal with regards to the source text.
657     ///
658     /// Use this instead of `==` when either span could be generated code,
659     /// and you only care that they point to the same bytes of source text.
source_equal(self, other: Span) -> bool660     pub fn source_equal(self, other: Span) -> bool {
661         let span = self.data();
662         let other = other.data();
663         span.lo == other.lo && span.hi == other.hi
664     }
665 
666     /// Returns `Some(span)`, where the start is trimmed by the end of `other`.
trim_start(self, other: Span) -> Option<Span>667     pub fn trim_start(self, other: Span) -> Option<Span> {
668         let span = self.data();
669         let other = other.data();
670         if span.hi > other.hi { Some(span.with_lo(cmp::max(span.lo, other.hi))) } else { None }
671     }
672 
673     /// Returns the source span -- this is either the supplied span, or the span for
674     /// the macro callsite that expanded to it.
source_callsite(self) -> Span675     pub fn source_callsite(self) -> Span {
676         let expn_data = self.ctxt().outer_expn_data();
677         if !expn_data.is_root() { expn_data.call_site.source_callsite() } else { self }
678     }
679 
680     /// The `Span` for the tokens in the previous macro expansion from which `self` was generated,
681     /// if any.
parent_callsite(self) -> Option<Span>682     pub fn parent_callsite(self) -> Option<Span> {
683         let expn_data = self.ctxt().outer_expn_data();
684         if !expn_data.is_root() { Some(expn_data.call_site) } else { None }
685     }
686 
687     /// Walk down the expansion ancestors to find a span that's contained within `outer`.
find_ancestor_inside(mut self, outer: Span) -> Option<Span>688     pub fn find_ancestor_inside(mut self, outer: Span) -> Option<Span> {
689         while !outer.contains(self) {
690             self = self.parent_callsite()?;
691         }
692         Some(self)
693     }
694 
695     /// Like `find_ancestor_inside`, but specifically for when spans might not
696     /// overlaps. Take care when using this, and prefer `find_ancestor_inside`
697     /// when you know that the spans are nested (modulo macro expansion).
find_ancestor_in_same_ctxt(mut self, other: Span) -> Option<Span>698     pub fn find_ancestor_in_same_ctxt(mut self, other: Span) -> Option<Span> {
699         while !Span::eq_ctxt(self, other) {
700             self = self.parent_callsite()?;
701         }
702         Some(self)
703     }
704 
705     /// Edition of the crate from which this span came.
edition(self) -> edition::Edition706     pub fn edition(self) -> edition::Edition {
707         self.ctxt().edition()
708     }
709 
710     #[inline]
is_rust_2015(self) -> bool711     pub fn is_rust_2015(self) -> bool {
712         self.edition().is_rust_2015()
713     }
714 
715     #[inline]
rust_2018(self) -> bool716     pub fn rust_2018(self) -> bool {
717         self.edition().rust_2018()
718     }
719 
720     #[inline]
rust_2021(self) -> bool721     pub fn rust_2021(self) -> bool {
722         self.edition().rust_2021()
723     }
724 
725     #[inline]
rust_2024(self) -> bool726     pub fn rust_2024(self) -> bool {
727         self.edition().rust_2024()
728     }
729 
730     /// Returns the source callee.
731     ///
732     /// Returns `None` if the supplied span has no expansion trace,
733     /// else returns the `ExpnData` for the macro definition
734     /// corresponding to the source callsite.
source_callee(self) -> Option<ExpnData>735     pub fn source_callee(self) -> Option<ExpnData> {
736         let expn_data = self.ctxt().outer_expn_data();
737 
738         // Create an iterator of call site expansions
739         iter::successors(Some(expn_data), |expn_data| {
740             Some(expn_data.call_site.ctxt().outer_expn_data())
741         })
742         // Find the last expansion which is not root
743         .take_while(|expn_data| !expn_data.is_root())
744         .last()
745     }
746 
747     /// Checks if a span is "internal" to a macro in which `#[unstable]`
748     /// items can be used (that is, a macro marked with
749     /// `#[allow_internal_unstable]`).
allows_unstable(self, feature: Symbol) -> bool750     pub fn allows_unstable(self, feature: Symbol) -> bool {
751         self.ctxt()
752             .outer_expn_data()
753             .allow_internal_unstable
754             .is_some_and(|features| features.iter().any(|&f| f == feature))
755     }
756 
757     /// Checks if this span arises from a compiler desugaring of kind `kind`.
is_desugaring(self, kind: DesugaringKind) -> bool758     pub fn is_desugaring(self, kind: DesugaringKind) -> bool {
759         match self.ctxt().outer_expn_data().kind {
760             ExpnKind::Desugaring(k) => k == kind,
761             _ => false,
762         }
763     }
764 
765     /// Returns the compiler desugaring that created this span, or `None`
766     /// if this span is not from a desugaring.
desugaring_kind(self) -> Option<DesugaringKind>767     pub fn desugaring_kind(self) -> Option<DesugaringKind> {
768         match self.ctxt().outer_expn_data().kind {
769             ExpnKind::Desugaring(k) => Some(k),
770             _ => None,
771         }
772     }
773 
774     /// Checks if a span is "internal" to a macro in which `unsafe`
775     /// can be used without triggering the `unsafe_code` lint.
776     /// (that is, a macro marked with `#[allow_internal_unsafe]`).
allows_unsafe(self) -> bool777     pub fn allows_unsafe(self) -> bool {
778         self.ctxt().outer_expn_data().allow_internal_unsafe
779     }
780 
macro_backtrace(mut self) -> impl Iterator<Item = ExpnData>781     pub fn macro_backtrace(mut self) -> impl Iterator<Item = ExpnData> {
782         let mut prev_span = DUMMY_SP;
783         iter::from_fn(move || {
784             loop {
785                 let expn_data = self.ctxt().outer_expn_data();
786                 if expn_data.is_root() {
787                     return None;
788                 }
789 
790                 let is_recursive = expn_data.call_site.source_equal(prev_span);
791 
792                 prev_span = self;
793                 self = expn_data.call_site;
794 
795                 // Don't print recursive invocations.
796                 if !is_recursive {
797                     return Some(expn_data);
798                 }
799             }
800         })
801     }
802 
803     /// Splits a span into two composite spans around a certain position.
split_at(self, pos: u32) -> (Span, Span)804     pub fn split_at(self, pos: u32) -> (Span, Span) {
805         let len = self.hi().0 - self.lo().0;
806         debug_assert!(pos <= len);
807 
808         let split_pos = BytePos(self.lo().0 + pos);
809         (
810             Span::new(self.lo(), split_pos, self.ctxt(), self.parent()),
811             Span::new(split_pos, self.hi(), self.ctxt(), self.parent()),
812         )
813     }
814 
815     /// Returns a `Span` that would enclose both `self` and `end`.
816     ///
817     /// Note that this can also be used to extend the span "backwards":
818     /// `start.to(end)` and `end.to(start)` return the same `Span`.
819     ///
820     /// ```text
821     ///     ____             ___
822     ///     self lorem ipsum end
823     ///     ^^^^^^^^^^^^^^^^^^^^
824     /// ```
to(self, end: Span) -> Span825     pub fn to(self, end: Span) -> Span {
826         let span_data = self.data();
827         let end_data = end.data();
828         // FIXME(jseyfried): `self.ctxt` should always equal `end.ctxt` here (cf. issue #23480).
829         // Return the macro span on its own to avoid weird diagnostic output. It is preferable to
830         // have an incomplete span than a completely nonsensical one.
831         if span_data.ctxt != end_data.ctxt {
832             if span_data.ctxt.is_root() {
833                 return end;
834             } else if end_data.ctxt.is_root() {
835                 return self;
836             }
837             // Both spans fall within a macro.
838             // FIXME(estebank): check if it is the *same* macro.
839         }
840         Span::new(
841             cmp::min(span_data.lo, end_data.lo),
842             cmp::max(span_data.hi, end_data.hi),
843             if span_data.ctxt.is_root() { end_data.ctxt } else { span_data.ctxt },
844             if span_data.parent == end_data.parent { span_data.parent } else { None },
845         )
846     }
847 
848     /// Returns a `Span` between the end of `self` to the beginning of `end`.
849     ///
850     /// ```text
851     ///     ____             ___
852     ///     self lorem ipsum end
853     ///         ^^^^^^^^^^^^^
854     /// ```
between(self, end: Span) -> Span855     pub fn between(self, end: Span) -> Span {
856         let span = self.data();
857         let end = end.data();
858         Span::new(
859             span.hi,
860             end.lo,
861             if end.ctxt.is_root() { end.ctxt } else { span.ctxt },
862             if span.parent == end.parent { span.parent } else { None },
863         )
864     }
865 
866     /// Returns a `Span` from the beginning of `self` until the beginning of `end`.
867     ///
868     /// ```text
869     ///     ____             ___
870     ///     self lorem ipsum end
871     ///     ^^^^^^^^^^^^^^^^^
872     /// ```
until(self, end: Span) -> Span873     pub fn until(self, end: Span) -> Span {
874         // Most of this function's body is copied from `to`.
875         // We can't just do `self.to(end.shrink_to_lo())`,
876         // because to also does some magic where it uses min/max so
877         // it can handle overlapping spans. Some advanced mis-use of
878         // `until` with different ctxts makes this visible.
879         let span_data = self.data();
880         let end_data = end.data();
881         // FIXME(jseyfried): `self.ctxt` should always equal `end.ctxt` here (cf. issue #23480).
882         // Return the macro span on its own to avoid weird diagnostic output. It is preferable to
883         // have an incomplete span than a completely nonsensical one.
884         if span_data.ctxt != end_data.ctxt {
885             if span_data.ctxt.is_root() {
886                 return end;
887             } else if end_data.ctxt.is_root() {
888                 return self;
889             }
890             // Both spans fall within a macro.
891             // FIXME(estebank): check if it is the *same* macro.
892         }
893         Span::new(
894             span_data.lo,
895             end_data.lo,
896             if end_data.ctxt.is_root() { end_data.ctxt } else { span_data.ctxt },
897             if span_data.parent == end_data.parent { span_data.parent } else { None },
898         )
899     }
900 
from_inner(self, inner: InnerSpan) -> Span901     pub fn from_inner(self, inner: InnerSpan) -> Span {
902         let span = self.data();
903         Span::new(
904             span.lo + BytePos::from_usize(inner.start),
905             span.lo + BytePos::from_usize(inner.end),
906             span.ctxt,
907             span.parent,
908         )
909     }
910 
911     /// Equivalent of `Span::def_site` from the proc macro API,
912     /// except that the location is taken from the `self` span.
with_def_site_ctxt(self, expn_id: ExpnId) -> Span913     pub fn with_def_site_ctxt(self, expn_id: ExpnId) -> Span {
914         self.with_ctxt_from_mark(expn_id, Transparency::Opaque)
915     }
916 
917     /// Equivalent of `Span::call_site` from the proc macro API,
918     /// except that the location is taken from the `self` span.
with_call_site_ctxt(self, expn_id: ExpnId) -> Span919     pub fn with_call_site_ctxt(self, expn_id: ExpnId) -> Span {
920         self.with_ctxt_from_mark(expn_id, Transparency::Transparent)
921     }
922 
923     /// Equivalent of `Span::mixed_site` from the proc macro API,
924     /// except that the location is taken from the `self` span.
with_mixed_site_ctxt(self, expn_id: ExpnId) -> Span925     pub fn with_mixed_site_ctxt(self, expn_id: ExpnId) -> Span {
926         self.with_ctxt_from_mark(expn_id, Transparency::SemiTransparent)
927     }
928 
929     /// Produces a span with the same location as `self` and context produced by a macro with the
930     /// given ID and transparency, assuming that macro was defined directly and not produced by
931     /// some other macro (which is the case for built-in and procedural macros).
with_ctxt_from_mark(self, expn_id: ExpnId, transparency: Transparency) -> Span932     pub fn with_ctxt_from_mark(self, expn_id: ExpnId, transparency: Transparency) -> Span {
933         self.with_ctxt(SyntaxContext::root().apply_mark(expn_id, transparency))
934     }
935 
936     #[inline]
apply_mark(self, expn_id: ExpnId, transparency: Transparency) -> Span937     pub fn apply_mark(self, expn_id: ExpnId, transparency: Transparency) -> Span {
938         let span = self.data();
939         span.with_ctxt(span.ctxt.apply_mark(expn_id, transparency))
940     }
941 
942     #[inline]
remove_mark(&mut self) -> ExpnId943     pub fn remove_mark(&mut self) -> ExpnId {
944         let mut span = self.data();
945         let mark = span.ctxt.remove_mark();
946         *self = Span::new(span.lo, span.hi, span.ctxt, span.parent);
947         mark
948     }
949 
950     #[inline]
adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId>951     pub fn adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId> {
952         let mut span = self.data();
953         let mark = span.ctxt.adjust(expn_id);
954         *self = Span::new(span.lo, span.hi, span.ctxt, span.parent);
955         mark
956     }
957 
958     #[inline]
normalize_to_macros_2_0_and_adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId>959     pub fn normalize_to_macros_2_0_and_adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId> {
960         let mut span = self.data();
961         let mark = span.ctxt.normalize_to_macros_2_0_and_adjust(expn_id);
962         *self = Span::new(span.lo, span.hi, span.ctxt, span.parent);
963         mark
964     }
965 
966     #[inline]
glob_adjust(&mut self, expn_id: ExpnId, glob_span: Span) -> Option<Option<ExpnId>>967     pub fn glob_adjust(&mut self, expn_id: ExpnId, glob_span: Span) -> Option<Option<ExpnId>> {
968         let mut span = self.data();
969         let mark = span.ctxt.glob_adjust(expn_id, glob_span);
970         *self = Span::new(span.lo, span.hi, span.ctxt, span.parent);
971         mark
972     }
973 
974     #[inline]
reverse_glob_adjust( &mut self, expn_id: ExpnId, glob_span: Span, ) -> Option<Option<ExpnId>>975     pub fn reverse_glob_adjust(
976         &mut self,
977         expn_id: ExpnId,
978         glob_span: Span,
979     ) -> Option<Option<ExpnId>> {
980         let mut span = self.data();
981         let mark = span.ctxt.reverse_glob_adjust(expn_id, glob_span);
982         *self = Span::new(span.lo, span.hi, span.ctxt, span.parent);
983         mark
984     }
985 
986     #[inline]
normalize_to_macros_2_0(self) -> Span987     pub fn normalize_to_macros_2_0(self) -> Span {
988         let span = self.data();
989         span.with_ctxt(span.ctxt.normalize_to_macros_2_0())
990     }
991 
992     #[inline]
normalize_to_macro_rules(self) -> Span993     pub fn normalize_to_macro_rules(self) -> Span {
994         let span = self.data();
995         span.with_ctxt(span.ctxt.normalize_to_macro_rules())
996     }
997 }
998 
999 impl Default for Span {
default() -> Self1000     fn default() -> Self {
1001         DUMMY_SP
1002     }
1003 }
1004 
1005 impl<E: Encoder> Encodable<E> for Span {
encode(&self, s: &mut E)1006     default fn encode(&self, s: &mut E) {
1007         let span = self.data();
1008         span.lo.encode(s);
1009         span.hi.encode(s);
1010     }
1011 }
1012 impl<D: Decoder> Decodable<D> for Span {
decode(s: &mut D) -> Span1013     default fn decode(s: &mut D) -> Span {
1014         let lo = Decodable::decode(s);
1015         let hi = Decodable::decode(s);
1016 
1017         Span::new(lo, hi, SyntaxContext::root(), None)
1018     }
1019 }
1020 
1021 /// Insert `source_map` into the session globals for the duration of the
1022 /// closure's execution.
set_source_map<T, F: FnOnce() -> T>(source_map: Lrc<SourceMap>, f: F) -> T1023 pub fn set_source_map<T, F: FnOnce() -> T>(source_map: Lrc<SourceMap>, f: F) -> T {
1024     with_session_globals(|session_globals| {
1025         *session_globals.source_map.borrow_mut() = Some(source_map);
1026     });
1027     struct ClearSourceMap;
1028     impl Drop for ClearSourceMap {
1029         fn drop(&mut self) {
1030             with_session_globals(|session_globals| {
1031                 session_globals.source_map.borrow_mut().take();
1032             });
1033         }
1034     }
1035 
1036     let _guard = ClearSourceMap;
1037     f()
1038 }
1039 
1040 impl fmt::Debug for Span {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1041     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1042         // Use the global `SourceMap` to print the span. If that's not
1043         // available, fall back to printing the raw values.
1044 
1045         fn fallback(span: Span, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1046             f.debug_struct("Span")
1047                 .field("lo", &span.lo())
1048                 .field("hi", &span.hi())
1049                 .field("ctxt", &span.ctxt())
1050                 .finish()
1051         }
1052 
1053         if SESSION_GLOBALS.is_set() {
1054             with_session_globals(|session_globals| {
1055                 if let Some(source_map) = &*session_globals.source_map.borrow() {
1056                     write!(f, "{} ({:?})", source_map.span_to_diagnostic_string(*self), self.ctxt())
1057                 } else {
1058                     fallback(*self, f)
1059                 }
1060             })
1061         } else {
1062             fallback(*self, f)
1063         }
1064     }
1065 }
1066 
1067 impl fmt::Debug for SpanData {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1068     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1069         fmt::Debug::fmt(&Span::new(self.lo, self.hi, self.ctxt, self.parent), f)
1070     }
1071 }
1072 
1073 /// Identifies an offset of a multi-byte character in a `SourceFile`.
1074 #[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)]
1075 pub struct MultiByteChar {
1076     /// The absolute offset of the character in the `SourceMap`.
1077     pub pos: BytePos,
1078     /// The number of bytes, `>= 2`.
1079     pub bytes: u8,
1080 }
1081 
1082 /// Identifies an offset of a non-narrow character in a `SourceFile`.
1083 #[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)]
1084 pub enum NonNarrowChar {
1085     /// Represents a zero-width character.
1086     ZeroWidth(BytePos),
1087     /// Represents a wide (full-width) character.
1088     Wide(BytePos),
1089     /// Represents a tab character, represented visually with a width of 4 characters.
1090     Tab(BytePos),
1091 }
1092 
1093 impl NonNarrowChar {
new(pos: BytePos, width: usize) -> Self1094     fn new(pos: BytePos, width: usize) -> Self {
1095         match width {
1096             0 => NonNarrowChar::ZeroWidth(pos),
1097             2 => NonNarrowChar::Wide(pos),
1098             4 => NonNarrowChar::Tab(pos),
1099             _ => panic!("width {width} given for non-narrow character"),
1100         }
1101     }
1102 
1103     /// Returns the absolute offset of the character in the `SourceMap`.
pos(&self) -> BytePos1104     pub fn pos(&self) -> BytePos {
1105         match *self {
1106             NonNarrowChar::ZeroWidth(p) | NonNarrowChar::Wide(p) | NonNarrowChar::Tab(p) => p,
1107         }
1108     }
1109 
1110     /// Returns the width of the character, 0 (zero-width) or 2 (wide).
width(&self) -> usize1111     pub fn width(&self) -> usize {
1112         match *self {
1113             NonNarrowChar::ZeroWidth(_) => 0,
1114             NonNarrowChar::Wide(_) => 2,
1115             NonNarrowChar::Tab(_) => 4,
1116         }
1117     }
1118 }
1119 
1120 impl Add<BytePos> for NonNarrowChar {
1121     type Output = Self;
1122 
add(self, rhs: BytePos) -> Self1123     fn add(self, rhs: BytePos) -> Self {
1124         match self {
1125             NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos + rhs),
1126             NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos + rhs),
1127             NonNarrowChar::Tab(pos) => NonNarrowChar::Tab(pos + rhs),
1128         }
1129     }
1130 }
1131 
1132 impl Sub<BytePos> for NonNarrowChar {
1133     type Output = Self;
1134 
sub(self, rhs: BytePos) -> Self1135     fn sub(self, rhs: BytePos) -> Self {
1136         match self {
1137             NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos - rhs),
1138             NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos - rhs),
1139             NonNarrowChar::Tab(pos) => NonNarrowChar::Tab(pos - rhs),
1140         }
1141     }
1142 }
1143 
1144 /// Identifies an offset of a character that was normalized away from `SourceFile`.
1145 #[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)]
1146 pub struct NormalizedPos {
1147     /// The absolute offset of the character in the `SourceMap`.
1148     pub pos: BytePos,
1149     /// The difference between original and normalized string at position.
1150     pub diff: u32,
1151 }
1152 
1153 #[derive(PartialEq, Eq, Clone, Debug)]
1154 pub enum ExternalSource {
1155     /// No external source has to be loaded, since the `SourceFile` represents a local crate.
1156     Unneeded,
1157     Foreign {
1158         kind: ExternalSourceKind,
1159         /// Index of the file inside metadata.
1160         metadata_index: u32,
1161     },
1162 }
1163 
1164 /// The state of the lazy external source loading mechanism of a `SourceFile`.
1165 #[derive(PartialEq, Eq, Clone, Debug)]
1166 pub enum ExternalSourceKind {
1167     /// The external source has been loaded already.
1168     Present(Lrc<String>),
1169     /// No attempt has been made to load the external source.
1170     AbsentOk,
1171     /// A failed attempt has been made to load the external source.
1172     AbsentErr,
1173     Unneeded,
1174 }
1175 
1176 impl ExternalSource {
get_source(&self) -> Option<&Lrc<String>>1177     pub fn get_source(&self) -> Option<&Lrc<String>> {
1178         match self {
1179             ExternalSource::Foreign { kind: ExternalSourceKind::Present(ref src), .. } => Some(src),
1180             _ => None,
1181         }
1182     }
1183 }
1184 
1185 #[derive(Debug)]
1186 pub struct OffsetOverflowError;
1187 
1188 #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Encodable, Decodable)]
1189 #[derive(HashStable_Generic)]
1190 pub enum SourceFileHashAlgorithm {
1191     Md5,
1192     Sha1,
1193     Sha256,
1194 }
1195 
1196 impl FromStr for SourceFileHashAlgorithm {
1197     type Err = ();
1198 
from_str(s: &str) -> Result<SourceFileHashAlgorithm, ()>1199     fn from_str(s: &str) -> Result<SourceFileHashAlgorithm, ()> {
1200         match s {
1201             "md5" => Ok(SourceFileHashAlgorithm::Md5),
1202             "sha1" => Ok(SourceFileHashAlgorithm::Sha1),
1203             "sha256" => Ok(SourceFileHashAlgorithm::Sha256),
1204             _ => Err(()),
1205         }
1206     }
1207 }
1208 
1209 /// The hash of the on-disk source file used for debug info.
1210 #[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
1211 #[derive(HashStable_Generic, Encodable, Decodable)]
1212 pub struct SourceFileHash {
1213     pub kind: SourceFileHashAlgorithm,
1214     value: [u8; 32],
1215 }
1216 
1217 impl SourceFileHash {
new(kind: SourceFileHashAlgorithm, src: &str) -> SourceFileHash1218     pub fn new(kind: SourceFileHashAlgorithm, src: &str) -> SourceFileHash {
1219         let mut hash = SourceFileHash { kind, value: Default::default() };
1220         let len = hash.hash_len();
1221         let value = &mut hash.value[..len];
1222         let data = src.as_bytes();
1223         match kind {
1224             SourceFileHashAlgorithm::Md5 => {
1225                 value.copy_from_slice(&Md5::digest(data));
1226             }
1227             SourceFileHashAlgorithm::Sha1 => {
1228                 value.copy_from_slice(&Sha1::digest(data));
1229             }
1230             SourceFileHashAlgorithm::Sha256 => {
1231                 value.copy_from_slice(&Sha256::digest(data));
1232             }
1233         }
1234         hash
1235     }
1236 
1237     /// Check if the stored hash matches the hash of the string.
matches(&self, src: &str) -> bool1238     pub fn matches(&self, src: &str) -> bool {
1239         Self::new(self.kind, src) == *self
1240     }
1241 
1242     /// The bytes of the hash.
hash_bytes(&self) -> &[u8]1243     pub fn hash_bytes(&self) -> &[u8] {
1244         let len = self.hash_len();
1245         &self.value[..len]
1246     }
1247 
hash_len(&self) -> usize1248     fn hash_len(&self) -> usize {
1249         match self.kind {
1250             SourceFileHashAlgorithm::Md5 => 16,
1251             SourceFileHashAlgorithm::Sha1 => 20,
1252             SourceFileHashAlgorithm::Sha256 => 32,
1253         }
1254     }
1255 }
1256 
1257 #[derive(Clone)]
1258 pub enum SourceFileLines {
1259     /// The source file lines, in decoded (random-access) form.
1260     Lines(Vec<BytePos>),
1261 
1262     /// The source file lines, in undecoded difference list form.
1263     Diffs(SourceFileDiffs),
1264 }
1265 
1266 impl SourceFileLines {
is_lines(&self) -> bool1267     pub fn is_lines(&self) -> bool {
1268         matches!(self, SourceFileLines::Lines(_))
1269     }
1270 }
1271 
1272 /// The source file lines in difference list form. This matches the form
1273 /// used within metadata, which saves space by exploiting the fact that the
1274 /// lines list is sorted and individual lines are usually not that long.
1275 ///
1276 /// We read it directly from metadata and only decode it into `Lines` form
1277 /// when necessary. This is a significant performance win, especially for
1278 /// small crates where very little of `std`'s metadata is used.
1279 #[derive(Clone)]
1280 pub struct SourceFileDiffs {
1281     /// Position of the first line. Note that this is always encoded as a
1282     /// `BytePos` because it is often much larger than any of the
1283     /// differences.
1284     line_start: BytePos,
1285 
1286     /// Always 1, 2, or 4. Always as small as possible, while being big
1287     /// enough to hold the length of the longest line in the source file.
1288     /// The 1 case is by far the most common.
1289     bytes_per_diff: usize,
1290 
1291     /// The number of diffs encoded in `raw_diffs`. Always one less than
1292     /// the number of lines in the source file.
1293     num_diffs: usize,
1294 
1295     /// The diffs in "raw" form. Each segment of `bytes_per_diff` length
1296     /// encodes one little-endian diff. Note that they aren't LEB128
1297     /// encoded. This makes for much faster decoding. Besides, the
1298     /// bytes_per_diff==1 case is by far the most common, and LEB128
1299     /// encoding has no effect on that case.
1300     raw_diffs: Vec<u8>,
1301 }
1302 
1303 /// A single source in the [`SourceMap`].
1304 pub struct SourceFile {
1305     /// The name of the file that the source came from. Source that doesn't
1306     /// originate from files has names between angle brackets by convention
1307     /// (e.g., `<anon>`).
1308     pub name: FileName,
1309     /// The complete source code.
1310     pub src: Option<Lrc<String>>,
1311     /// The source code's hash.
1312     pub src_hash: SourceFileHash,
1313     /// The external source code (used for external crates, which will have a `None`
1314     /// value as `self.src`.
1315     pub external_src: Lock<ExternalSource>,
1316     /// The start position of this source in the `SourceMap`.
1317     pub start_pos: BytePos,
1318     /// The end position of this source in the `SourceMap`.
1319     pub end_pos: BytePos,
1320     /// Locations of lines beginnings in the source code.
1321     pub lines: Lock<SourceFileLines>,
1322     /// Locations of multi-byte characters in the source code.
1323     pub multibyte_chars: Vec<MultiByteChar>,
1324     /// Width of characters that are not narrow in the source code.
1325     pub non_narrow_chars: Vec<NonNarrowChar>,
1326     /// Locations of characters removed during normalization.
1327     pub normalized_pos: Vec<NormalizedPos>,
1328     /// A hash of the filename, used for speeding up hashing in incremental compilation.
1329     pub name_hash: Hash128,
1330     /// Indicates which crate this `SourceFile` was imported from.
1331     pub cnum: CrateNum,
1332 }
1333 
1334 impl Clone for SourceFile {
clone(&self) -> Self1335     fn clone(&self) -> Self {
1336         Self {
1337             name: self.name.clone(),
1338             src: self.src.clone(),
1339             src_hash: self.src_hash,
1340             external_src: Lock::new(self.external_src.borrow().clone()),
1341             start_pos: self.start_pos,
1342             end_pos: self.end_pos,
1343             lines: Lock::new(self.lines.borrow().clone()),
1344             multibyte_chars: self.multibyte_chars.clone(),
1345             non_narrow_chars: self.non_narrow_chars.clone(),
1346             normalized_pos: self.normalized_pos.clone(),
1347             name_hash: self.name_hash,
1348             cnum: self.cnum,
1349         }
1350     }
1351 }
1352 
1353 impl<S: Encoder> Encodable<S> for SourceFile {
encode(&self, s: &mut S)1354     fn encode(&self, s: &mut S) {
1355         self.name.encode(s);
1356         self.src_hash.encode(s);
1357         self.start_pos.encode(s);
1358         self.end_pos.encode(s);
1359 
1360         // We are always in `Lines` form by the time we reach here.
1361         assert!(self.lines.borrow().is_lines());
1362         self.lines(|lines| {
1363             // Store the length.
1364             s.emit_u32(lines.len() as u32);
1365 
1366             // Compute and store the difference list.
1367             if lines.len() != 0 {
1368                 let max_line_length = if lines.len() == 1 {
1369                     0
1370                 } else {
1371                     lines
1372                         .array_windows()
1373                         .map(|&[fst, snd]| snd - fst)
1374                         .map(|bp| bp.to_usize())
1375                         .max()
1376                         .unwrap()
1377                 };
1378 
1379                 let bytes_per_diff: usize = match max_line_length {
1380                     0..=0xFF => 1,
1381                     0x100..=0xFFFF => 2,
1382                     _ => 4,
1383                 };
1384 
1385                 // Encode the number of bytes used per diff.
1386                 s.emit_u8(bytes_per_diff as u8);
1387 
1388                 // Encode the first element.
1389                 lines[0].encode(s);
1390 
1391                 // Encode the difference list.
1392                 let diff_iter = lines.array_windows().map(|&[fst, snd]| snd - fst);
1393                 let num_diffs = lines.len() - 1;
1394                 let mut raw_diffs;
1395                 match bytes_per_diff {
1396                     1 => {
1397                         raw_diffs = Vec::with_capacity(num_diffs);
1398                         for diff in diff_iter {
1399                             raw_diffs.push(diff.0 as u8);
1400                         }
1401                     }
1402                     2 => {
1403                         raw_diffs = Vec::with_capacity(bytes_per_diff * num_diffs);
1404                         for diff in diff_iter {
1405                             raw_diffs.extend_from_slice(&(diff.0 as u16).to_le_bytes());
1406                         }
1407                     }
1408                     4 => {
1409                         raw_diffs = Vec::with_capacity(bytes_per_diff * num_diffs);
1410                         for diff in diff_iter {
1411                             raw_diffs.extend_from_slice(&(diff.0).to_le_bytes());
1412                         }
1413                     }
1414                     _ => unreachable!(),
1415                 }
1416                 s.emit_raw_bytes(&raw_diffs);
1417             }
1418         });
1419 
1420         self.multibyte_chars.encode(s);
1421         self.non_narrow_chars.encode(s);
1422         self.name_hash.encode(s);
1423         self.normalized_pos.encode(s);
1424         self.cnum.encode(s);
1425     }
1426 }
1427 
1428 impl<D: Decoder> Decodable<D> for SourceFile {
decode(d: &mut D) -> SourceFile1429     fn decode(d: &mut D) -> SourceFile {
1430         let name: FileName = Decodable::decode(d);
1431         let src_hash: SourceFileHash = Decodable::decode(d);
1432         let start_pos: BytePos = Decodable::decode(d);
1433         let end_pos: BytePos = Decodable::decode(d);
1434         let lines = {
1435             let num_lines: u32 = Decodable::decode(d);
1436             if num_lines > 0 {
1437                 // Read the number of bytes used per diff.
1438                 let bytes_per_diff = d.read_u8() as usize;
1439 
1440                 // Read the first element.
1441                 let line_start: BytePos = Decodable::decode(d);
1442 
1443                 // Read the difference list.
1444                 let num_diffs = num_lines as usize - 1;
1445                 let raw_diffs = d.read_raw_bytes(bytes_per_diff * num_diffs).to_vec();
1446                 SourceFileLines::Diffs(SourceFileDiffs {
1447                     line_start,
1448                     bytes_per_diff,
1449                     num_diffs,
1450                     raw_diffs,
1451                 })
1452             } else {
1453                 SourceFileLines::Lines(vec![])
1454             }
1455         };
1456         let multibyte_chars: Vec<MultiByteChar> = Decodable::decode(d);
1457         let non_narrow_chars: Vec<NonNarrowChar> = Decodable::decode(d);
1458         let name_hash = Decodable::decode(d);
1459         let normalized_pos: Vec<NormalizedPos> = Decodable::decode(d);
1460         let cnum: CrateNum = Decodable::decode(d);
1461         SourceFile {
1462             name,
1463             start_pos,
1464             end_pos,
1465             src: None,
1466             src_hash,
1467             // Unused - the metadata decoder will construct
1468             // a new SourceFile, filling in `external_src` properly
1469             external_src: Lock::new(ExternalSource::Unneeded),
1470             lines: Lock::new(lines),
1471             multibyte_chars,
1472             non_narrow_chars,
1473             normalized_pos,
1474             name_hash,
1475             cnum,
1476         }
1477     }
1478 }
1479 
1480 impl fmt::Debug for SourceFile {
fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result1481     fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
1482         write!(fmt, "SourceFile({:?})", self.name)
1483     }
1484 }
1485 
1486 impl SourceFile {
new( name: FileName, mut src: String, start_pos: BytePos, hash_kind: SourceFileHashAlgorithm, ) -> Self1487     pub fn new(
1488         name: FileName,
1489         mut src: String,
1490         start_pos: BytePos,
1491         hash_kind: SourceFileHashAlgorithm,
1492     ) -> Self {
1493         // Compute the file hash before any normalization.
1494         let src_hash = SourceFileHash::new(hash_kind, &src);
1495         let normalized_pos = normalize_src(&mut src, start_pos);
1496 
1497         let name_hash = {
1498             let mut hasher: StableHasher = StableHasher::new();
1499             name.hash(&mut hasher);
1500             hasher.finish()
1501         };
1502         let end_pos = start_pos.to_usize() + src.len();
1503         assert!(end_pos <= u32::MAX as usize);
1504 
1505         let (lines, multibyte_chars, non_narrow_chars) =
1506             analyze_source_file::analyze_source_file(&src, start_pos);
1507 
1508         SourceFile {
1509             name,
1510             src: Some(Lrc::new(src)),
1511             src_hash,
1512             external_src: Lock::new(ExternalSource::Unneeded),
1513             start_pos,
1514             end_pos: Pos::from_usize(end_pos),
1515             lines: Lock::new(SourceFileLines::Lines(lines)),
1516             multibyte_chars,
1517             non_narrow_chars,
1518             normalized_pos,
1519             name_hash,
1520             cnum: LOCAL_CRATE,
1521         }
1522     }
1523 
lines<F, R>(&self, f: F) -> R where F: FnOnce(&[BytePos]) -> R,1524     pub fn lines<F, R>(&self, f: F) -> R
1525     where
1526         F: FnOnce(&[BytePos]) -> R,
1527     {
1528         let mut guard = self.lines.borrow_mut();
1529         match &*guard {
1530             SourceFileLines::Lines(lines) => f(lines),
1531             SourceFileLines::Diffs(SourceFileDiffs {
1532                 mut line_start,
1533                 bytes_per_diff,
1534                 num_diffs,
1535                 raw_diffs,
1536             }) => {
1537                 // Convert from "diffs" form to "lines" form.
1538                 let num_lines = num_diffs + 1;
1539                 let mut lines = Vec::with_capacity(num_lines);
1540                 lines.push(line_start);
1541 
1542                 assert_eq!(*num_diffs, raw_diffs.len() / bytes_per_diff);
1543                 match bytes_per_diff {
1544                     1 => {
1545                         lines.extend(raw_diffs.into_iter().map(|&diff| {
1546                             line_start = line_start + BytePos(diff as u32);
1547                             line_start
1548                         }));
1549                     }
1550                     2 => {
1551                         lines.extend((0..*num_diffs).map(|i| {
1552                             let pos = bytes_per_diff * i;
1553                             let bytes = [raw_diffs[pos], raw_diffs[pos + 1]];
1554                             let diff = u16::from_le_bytes(bytes);
1555                             line_start = line_start + BytePos(diff as u32);
1556                             line_start
1557                         }));
1558                     }
1559                     4 => {
1560                         lines.extend((0..*num_diffs).map(|i| {
1561                             let pos = bytes_per_diff * i;
1562                             let bytes = [
1563                                 raw_diffs[pos],
1564                                 raw_diffs[pos + 1],
1565                                 raw_diffs[pos + 2],
1566                                 raw_diffs[pos + 3],
1567                             ];
1568                             let diff = u32::from_le_bytes(bytes);
1569                             line_start = line_start + BytePos(diff);
1570                             line_start
1571                         }));
1572                     }
1573                     _ => unreachable!(),
1574                 }
1575                 let res = f(&lines);
1576                 *guard = SourceFileLines::Lines(lines);
1577                 res
1578             }
1579         }
1580     }
1581 
1582     /// Returns the `BytePos` of the beginning of the current line.
line_begin_pos(&self, pos: BytePos) -> BytePos1583     pub fn line_begin_pos(&self, pos: BytePos) -> BytePos {
1584         let line_index = self.lookup_line(pos).unwrap();
1585         self.lines(|lines| lines[line_index])
1586     }
1587 
1588     /// Add externally loaded source.
1589     /// If the hash of the input doesn't match or no input is supplied via None,
1590     /// it is interpreted as an error and the corresponding enum variant is set.
1591     /// The return value signifies whether some kind of source is present.
add_external_src<F>(&self, get_src: F) -> bool where F: FnOnce() -> Option<String>,1592     pub fn add_external_src<F>(&self, get_src: F) -> bool
1593     where
1594         F: FnOnce() -> Option<String>,
1595     {
1596         if matches!(
1597             *self.external_src.borrow(),
1598             ExternalSource::Foreign { kind: ExternalSourceKind::AbsentOk, .. }
1599         ) {
1600             let src = get_src();
1601             let mut external_src = self.external_src.borrow_mut();
1602             // Check that no-one else have provided the source while we were getting it
1603             if let ExternalSource::Foreign {
1604                 kind: src_kind @ ExternalSourceKind::AbsentOk, ..
1605             } = &mut *external_src
1606             {
1607                 if let Some(mut src) = src {
1608                     // The src_hash needs to be computed on the pre-normalized src.
1609                     if self.src_hash.matches(&src) {
1610                         normalize_src(&mut src, BytePos::from_usize(0));
1611                         *src_kind = ExternalSourceKind::Present(Lrc::new(src));
1612                         return true;
1613                     }
1614                 } else {
1615                     *src_kind = ExternalSourceKind::AbsentErr;
1616                 }
1617 
1618                 false
1619             } else {
1620                 self.src.is_some() || external_src.get_source().is_some()
1621             }
1622         } else {
1623             self.src.is_some() || self.external_src.borrow().get_source().is_some()
1624         }
1625     }
1626 
1627     /// Gets a line from the list of pre-computed line-beginnings.
1628     /// The line number here is 0-based.
get_line(&self, line_number: usize) -> Option<Cow<'_, str>>1629     pub fn get_line(&self, line_number: usize) -> Option<Cow<'_, str>> {
1630         fn get_until_newline(src: &str, begin: usize) -> &str {
1631             // We can't use `lines.get(line_number+1)` because we might
1632             // be parsing when we call this function and thus the current
1633             // line is the last one we have line info for.
1634             let slice = &src[begin..];
1635             match slice.find('\n') {
1636                 Some(e) => &slice[..e],
1637                 None => slice,
1638             }
1639         }
1640 
1641         let begin = {
1642             let line = self.lines(|lines| lines.get(line_number).copied())?;
1643             let begin: BytePos = line - self.start_pos;
1644             begin.to_usize()
1645         };
1646 
1647         if let Some(ref src) = self.src {
1648             Some(Cow::from(get_until_newline(src, begin)))
1649         } else {
1650             self.external_src
1651                 .borrow()
1652                 .get_source()
1653                 .map(|src| Cow::Owned(String::from(get_until_newline(src, begin))))
1654         }
1655     }
1656 
is_real_file(&self) -> bool1657     pub fn is_real_file(&self) -> bool {
1658         self.name.is_real()
1659     }
1660 
1661     #[inline]
is_imported(&self) -> bool1662     pub fn is_imported(&self) -> bool {
1663         self.src.is_none()
1664     }
1665 
count_lines(&self) -> usize1666     pub fn count_lines(&self) -> usize {
1667         self.lines(|lines| lines.len())
1668     }
1669 
1670     /// Finds the line containing the given position. The return value is the
1671     /// index into the `lines` array of this `SourceFile`, not the 1-based line
1672     /// number. If the source_file is empty or the position is located before the
1673     /// first line, `None` is returned.
lookup_line(&self, pos: BytePos) -> Option<usize>1674     pub fn lookup_line(&self, pos: BytePos) -> Option<usize> {
1675         self.lines(|lines| lines.partition_point(|x| x <= &pos).checked_sub(1))
1676     }
1677 
line_bounds(&self, line_index: usize) -> Range<BytePos>1678     pub fn line_bounds(&self, line_index: usize) -> Range<BytePos> {
1679         if self.is_empty() {
1680             return self.start_pos..self.end_pos;
1681         }
1682 
1683         self.lines(|lines| {
1684             assert!(line_index < lines.len());
1685             if line_index == (lines.len() - 1) {
1686                 lines[line_index]..self.end_pos
1687             } else {
1688                 lines[line_index]..lines[line_index + 1]
1689             }
1690         })
1691     }
1692 
1693     /// Returns whether or not the file contains the given `SourceMap` byte
1694     /// position. The position one past the end of the file is considered to be
1695     /// contained by the file. This implies that files for which `is_empty`
1696     /// returns true still contain one byte position according to this function.
1697     #[inline]
contains(&self, byte_pos: BytePos) -> bool1698     pub fn contains(&self, byte_pos: BytePos) -> bool {
1699         byte_pos >= self.start_pos && byte_pos <= self.end_pos
1700     }
1701 
1702     #[inline]
is_empty(&self) -> bool1703     pub fn is_empty(&self) -> bool {
1704         self.start_pos == self.end_pos
1705     }
1706 
1707     /// Calculates the original byte position relative to the start of the file
1708     /// based on the given byte position.
original_relative_byte_pos(&self, pos: BytePos) -> BytePos1709     pub fn original_relative_byte_pos(&self, pos: BytePos) -> BytePos {
1710         // Diff before any records is 0. Otherwise use the previously recorded
1711         // diff as that applies to the following characters until a new diff
1712         // is recorded.
1713         let diff = match self.normalized_pos.binary_search_by(|np| np.pos.cmp(&pos)) {
1714             Ok(i) => self.normalized_pos[i].diff,
1715             Err(i) if i == 0 => 0,
1716             Err(i) => self.normalized_pos[i - 1].diff,
1717         };
1718 
1719         BytePos::from_u32(pos.0 - self.start_pos.0 + diff)
1720     }
1721 
1722     /// Calculates a normalized byte position from a byte offset relative to the
1723     /// start of the file.
1724     ///
1725     /// When we get an inline assembler error from LLVM during codegen, we
1726     /// import the expanded assembly code as a new `SourceFile`, which can then
1727     /// be used for error reporting with spans. However the byte offsets given
1728     /// to us by LLVM are relative to the start of the original buffer, not the
1729     /// normalized one. Hence we need to convert those offsets to the normalized
1730     /// form when constructing spans.
normalized_byte_pos(&self, offset: u32) -> BytePos1731     pub fn normalized_byte_pos(&self, offset: u32) -> BytePos {
1732         let diff = match self
1733             .normalized_pos
1734             .binary_search_by(|np| (np.pos.0 + np.diff).cmp(&(self.start_pos.0 + offset)))
1735         {
1736             Ok(i) => self.normalized_pos[i].diff,
1737             Err(i) if i == 0 => 0,
1738             Err(i) => self.normalized_pos[i - 1].diff,
1739         };
1740 
1741         BytePos::from_u32(self.start_pos.0 + offset - diff)
1742     }
1743 
1744     /// Converts an absolute `BytePos` to a `CharPos` relative to the `SourceFile`.
bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos1745     pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos {
1746         // The number of extra bytes due to multibyte chars in the `SourceFile`.
1747         let mut total_extra_bytes = 0;
1748 
1749         for mbc in self.multibyte_chars.iter() {
1750             debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos);
1751             if mbc.pos < bpos {
1752                 // Every character is at least one byte, so we only
1753                 // count the actual extra bytes.
1754                 total_extra_bytes += mbc.bytes as u32 - 1;
1755                 // We should never see a byte position in the middle of a
1756                 // character.
1757                 assert!(bpos.to_u32() >= mbc.pos.to_u32() + mbc.bytes as u32);
1758             } else {
1759                 break;
1760             }
1761         }
1762 
1763         assert!(self.start_pos.to_u32() + total_extra_bytes <= bpos.to_u32());
1764         CharPos(bpos.to_usize() - self.start_pos.to_usize() - total_extra_bytes as usize)
1765     }
1766 
1767     /// Looks up the file's (1-based) line number and (0-based `CharPos`) column offset, for a
1768     /// given `BytePos`.
lookup_file_pos(&self, pos: BytePos) -> (usize, CharPos)1769     pub fn lookup_file_pos(&self, pos: BytePos) -> (usize, CharPos) {
1770         let chpos = self.bytepos_to_file_charpos(pos);
1771         match self.lookup_line(pos) {
1772             Some(a) => {
1773                 let line = a + 1; // Line numbers start at 1
1774                 let linebpos = self.lines(|lines| lines[a]);
1775                 let linechpos = self.bytepos_to_file_charpos(linebpos);
1776                 let col = chpos - linechpos;
1777                 debug!("byte pos {:?} is on the line at byte pos {:?}", pos, linebpos);
1778                 debug!("char pos {:?} is on the line at char pos {:?}", chpos, linechpos);
1779                 debug!("byte is on line: {}", line);
1780                 assert!(chpos >= linechpos);
1781                 (line, col)
1782             }
1783             None => (0, chpos),
1784         }
1785     }
1786 
1787     /// Looks up the file's (1-based) line number, (0-based `CharPos`) column offset, and (0-based)
1788     /// column offset when displayed, for a given `BytePos`.
lookup_file_pos_with_col_display(&self, pos: BytePos) -> (usize, CharPos, usize)1789     pub fn lookup_file_pos_with_col_display(&self, pos: BytePos) -> (usize, CharPos, usize) {
1790         let (line, col_or_chpos) = self.lookup_file_pos(pos);
1791         if line > 0 {
1792             let col = col_or_chpos;
1793             let linebpos = self.lines(|lines| lines[line - 1]);
1794             let col_display = {
1795                 let start_width_idx = self
1796                     .non_narrow_chars
1797                     .binary_search_by_key(&linebpos, |x| x.pos())
1798                     .unwrap_or_else(|x| x);
1799                 let end_width_idx = self
1800                     .non_narrow_chars
1801                     .binary_search_by_key(&pos, |x| x.pos())
1802                     .unwrap_or_else(|x| x);
1803                 let special_chars = end_width_idx - start_width_idx;
1804                 let non_narrow: usize = self.non_narrow_chars[start_width_idx..end_width_idx]
1805                     .iter()
1806                     .map(|x| x.width())
1807                     .sum();
1808                 col.0 - special_chars + non_narrow
1809             };
1810             (line, col, col_display)
1811         } else {
1812             let chpos = col_or_chpos;
1813             let col_display = {
1814                 let end_width_idx = self
1815                     .non_narrow_chars
1816                     .binary_search_by_key(&pos, |x| x.pos())
1817                     .unwrap_or_else(|x| x);
1818                 let non_narrow: usize =
1819                     self.non_narrow_chars[0..end_width_idx].iter().map(|x| x.width()).sum();
1820                 chpos.0 - end_width_idx + non_narrow
1821             };
1822             (0, chpos, col_display)
1823         }
1824     }
1825 }
1826 
1827 /// Normalizes the source code and records the normalizations.
normalize_src(src: &mut String, start_pos: BytePos) -> Vec<NormalizedPos>1828 fn normalize_src(src: &mut String, start_pos: BytePos) -> Vec<NormalizedPos> {
1829     let mut normalized_pos = vec![];
1830     remove_bom(src, &mut normalized_pos);
1831     normalize_newlines(src, &mut normalized_pos);
1832 
1833     // Offset all the positions by start_pos to match the final file positions.
1834     for np in &mut normalized_pos {
1835         np.pos.0 += start_pos.0;
1836     }
1837 
1838     normalized_pos
1839 }
1840 
1841 /// Removes UTF-8 BOM, if any.
remove_bom(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>)1842 fn remove_bom(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) {
1843     if src.starts_with('\u{feff}') {
1844         src.drain(..3);
1845         normalized_pos.push(NormalizedPos { pos: BytePos(0), diff: 3 });
1846     }
1847 }
1848 
1849 /// Replaces `\r\n` with `\n` in-place in `src`.
1850 ///
1851 /// Returns error if there's a lone `\r` in the string.
normalize_newlines(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>)1852 fn normalize_newlines(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) {
1853     if !src.as_bytes().contains(&b'\r') {
1854         return;
1855     }
1856 
1857     // We replace `\r\n` with `\n` in-place, which doesn't break utf-8 encoding.
1858     // While we *can* call `as_mut_vec` and do surgery on the live string
1859     // directly, let's rather steal the contents of `src`. This makes the code
1860     // safe even if a panic occurs.
1861 
1862     let mut buf = std::mem::replace(src, String::new()).into_bytes();
1863     let mut gap_len = 0;
1864     let mut tail = buf.as_mut_slice();
1865     let mut cursor = 0;
1866     let original_gap = normalized_pos.last().map_or(0, |l| l.diff);
1867     loop {
1868         let idx = match find_crlf(&tail[gap_len..]) {
1869             None => tail.len(),
1870             Some(idx) => idx + gap_len,
1871         };
1872         tail.copy_within(gap_len..idx, 0);
1873         tail = &mut tail[idx - gap_len..];
1874         if tail.len() == gap_len {
1875             break;
1876         }
1877         cursor += idx - gap_len;
1878         gap_len += 1;
1879         normalized_pos.push(NormalizedPos {
1880             pos: BytePos::from_usize(cursor + 1),
1881             diff: original_gap + gap_len as u32,
1882         });
1883     }
1884 
1885     // Account for removed `\r`.
1886     // After `set_len`, `buf` is guaranteed to contain utf-8 again.
1887     let new_len = buf.len() - gap_len;
1888     unsafe {
1889         buf.set_len(new_len);
1890         *src = String::from_utf8_unchecked(buf);
1891     }
1892 
1893     fn find_crlf(src: &[u8]) -> Option<usize> {
1894         let mut search_idx = 0;
1895         while let Some(idx) = find_cr(&src[search_idx..]) {
1896             if src[search_idx..].get(idx + 1) != Some(&b'\n') {
1897                 search_idx += idx + 1;
1898                 continue;
1899             }
1900             return Some(search_idx + idx);
1901         }
1902         None
1903     }
1904 
1905     fn find_cr(src: &[u8]) -> Option<usize> {
1906         src.iter().position(|&b| b == b'\r')
1907     }
1908 }
1909 
1910 // _____________________________________________________________________________
1911 // Pos, BytePos, CharPos
1912 //
1913 
1914 pub trait Pos {
from_usize(n: usize) -> Self1915     fn from_usize(n: usize) -> Self;
to_usize(&self) -> usize1916     fn to_usize(&self) -> usize;
from_u32(n: u32) -> Self1917     fn from_u32(n: u32) -> Self;
to_u32(&self) -> u321918     fn to_u32(&self) -> u32;
1919 }
1920 
1921 macro_rules! impl_pos {
1922     (
1923         $(
1924             $(#[$attr:meta])*
1925             $vis:vis struct $ident:ident($inner_vis:vis $inner_ty:ty);
1926         )*
1927     ) => {
1928         $(
1929             $(#[$attr])*
1930             $vis struct $ident($inner_vis $inner_ty);
1931 
1932             impl Pos for $ident {
1933                 #[inline(always)]
1934                 fn from_usize(n: usize) -> $ident {
1935                     $ident(n as $inner_ty)
1936                 }
1937 
1938                 #[inline(always)]
1939                 fn to_usize(&self) -> usize {
1940                     self.0 as usize
1941                 }
1942 
1943                 #[inline(always)]
1944                 fn from_u32(n: u32) -> $ident {
1945                     $ident(n as $inner_ty)
1946                 }
1947 
1948                 #[inline(always)]
1949                 fn to_u32(&self) -> u32 {
1950                     self.0 as u32
1951                 }
1952             }
1953 
1954             impl Add for $ident {
1955                 type Output = $ident;
1956 
1957                 #[inline(always)]
1958                 fn add(self, rhs: $ident) -> $ident {
1959                     $ident(self.0 + rhs.0)
1960                 }
1961             }
1962 
1963             impl Sub for $ident {
1964                 type Output = $ident;
1965 
1966                 #[inline(always)]
1967                 fn sub(self, rhs: $ident) -> $ident {
1968                     $ident(self.0 - rhs.0)
1969                 }
1970             }
1971         )*
1972     };
1973 }
1974 
1975 impl_pos! {
1976     /// A byte offset.
1977     ///
1978     /// Keep this small (currently 32-bits), as AST contains a lot of them.
1979     #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
1980     pub struct BytePos(pub u32);
1981 
1982     /// A character offset.
1983     ///
1984     /// Because of multibyte UTF-8 characters, a byte offset
1985     /// is not equivalent to a character offset. The [`SourceMap`] will convert [`BytePos`]
1986     /// values to `CharPos` values as necessary.
1987     #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
1988     pub struct CharPos(pub usize);
1989 }
1990 
1991 impl<S: Encoder> Encodable<S> for BytePos {
encode(&self, s: &mut S)1992     fn encode(&self, s: &mut S) {
1993         s.emit_u32(self.0);
1994     }
1995 }
1996 
1997 impl<D: Decoder> Decodable<D> for BytePos {
decode(d: &mut D) -> BytePos1998     fn decode(d: &mut D) -> BytePos {
1999         BytePos(d.read_u32())
2000     }
2001 }
2002 
2003 // _____________________________________________________________________________
2004 // Loc, SourceFileAndLine, SourceFileAndBytePos
2005 //
2006 
2007 /// A source code location used for error reporting.
2008 #[derive(Debug, Clone)]
2009 pub struct Loc {
2010     /// Information about the original source.
2011     pub file: Lrc<SourceFile>,
2012     /// The (1-based) line number.
2013     pub line: usize,
2014     /// The (0-based) column offset.
2015     pub col: CharPos,
2016     /// The (0-based) column offset when displayed.
2017     pub col_display: usize,
2018 }
2019 
2020 // Used to be structural records.
2021 #[derive(Debug)]
2022 pub struct SourceFileAndLine {
2023     pub sf: Lrc<SourceFile>,
2024     /// Index of line, starting from 0.
2025     pub line: usize,
2026 }
2027 #[derive(Debug)]
2028 pub struct SourceFileAndBytePos {
2029     pub sf: Lrc<SourceFile>,
2030     pub pos: BytePos,
2031 }
2032 
2033 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
2034 pub struct LineInfo {
2035     /// Index of line, starting from 0.
2036     pub line_index: usize,
2037 
2038     /// Column in line where span begins, starting from 0.
2039     pub start_col: CharPos,
2040 
2041     /// Column in line where span ends, starting from 0, exclusive.
2042     pub end_col: CharPos,
2043 }
2044 
2045 pub struct FileLines {
2046     pub file: Lrc<SourceFile>,
2047     pub lines: Vec<LineInfo>,
2048 }
2049 
2050 pub static SPAN_TRACK: AtomicRef<fn(LocalDefId)> = AtomicRef::new(&((|_| {}) as fn(_)));
2051 
2052 // _____________________________________________________________________________
2053 // SpanLinesError, SpanSnippetError, DistinctSources, MalformedSourceMapPositions
2054 //
2055 
2056 pub type FileLinesResult = Result<FileLines, SpanLinesError>;
2057 
2058 #[derive(Clone, PartialEq, Eq, Debug)]
2059 pub enum SpanLinesError {
2060     DistinctSources(Box<DistinctSources>),
2061 }
2062 
2063 #[derive(Clone, PartialEq, Eq, Debug)]
2064 pub enum SpanSnippetError {
2065     IllFormedSpan(Span),
2066     DistinctSources(Box<DistinctSources>),
2067     MalformedForSourcemap(MalformedSourceMapPositions),
2068     SourceNotAvailable { filename: FileName },
2069 }
2070 
2071 #[derive(Clone, PartialEq, Eq, Debug)]
2072 pub struct DistinctSources {
2073     pub begin: (FileName, BytePos),
2074     pub end: (FileName, BytePos),
2075 }
2076 
2077 #[derive(Clone, PartialEq, Eq, Debug)]
2078 pub struct MalformedSourceMapPositions {
2079     pub name: FileName,
2080     pub source_len: usize,
2081     pub begin_pos: BytePos,
2082     pub end_pos: BytePos,
2083 }
2084 
2085 /// Range inside of a `Span` used for diagnostics when we only have access to relative positions.
2086 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
2087 pub struct InnerSpan {
2088     pub start: usize,
2089     pub end: usize,
2090 }
2091 
2092 impl InnerSpan {
new(start: usize, end: usize) -> InnerSpan2093     pub fn new(start: usize, end: usize) -> InnerSpan {
2094         InnerSpan { start, end }
2095     }
2096 }
2097 
2098 /// Requirements for a `StableHashingContext` to be used in this crate.
2099 ///
2100 /// This is a hack to allow using the [`HashStable_Generic`] derive macro
2101 /// instead of implementing everything in rustc_middle.
2102 pub trait HashStableContext {
def_path_hash(&self, def_id: DefId) -> DefPathHash2103     fn def_path_hash(&self, def_id: DefId) -> DefPathHash;
hash_spans(&self) -> bool2104     fn hash_spans(&self) -> bool;
2105     /// Accesses `sess.opts.unstable_opts.incremental_ignore_spans` since
2106     /// we don't have easy access to a `Session`
unstable_opts_incremental_ignore_spans(&self) -> bool2107     fn unstable_opts_incremental_ignore_spans(&self) -> bool;
def_span(&self, def_id: LocalDefId) -> Span2108     fn def_span(&self, def_id: LocalDefId) -> Span;
span_data_to_lines_and_cols( &mut self, span: &SpanData, ) -> Option<(Lrc<SourceFile>, usize, BytePos, usize, BytePos)>2109     fn span_data_to_lines_and_cols(
2110         &mut self,
2111         span: &SpanData,
2112     ) -> Option<(Lrc<SourceFile>, usize, BytePos, usize, BytePos)>;
hashing_controls(&self) -> HashingControls2113     fn hashing_controls(&self) -> HashingControls;
2114 }
2115 
2116 impl<CTX> HashStable<CTX> for Span
2117 where
2118     CTX: HashStableContext,
2119 {
2120     /// Hashes a span in a stable way. We can't directly hash the span's `BytePos`
2121     /// fields (that would be similar to hashing pointers, since those are just
2122     /// offsets into the `SourceMap`). Instead, we hash the (file name, line, column)
2123     /// triple, which stays the same even if the containing `SourceFile` has moved
2124     /// within the `SourceMap`.
2125     ///
2126     /// Also note that we are hashing byte offsets for the column, not unicode
2127     /// codepoint offsets. For the purpose of the hash that's sufficient.
2128     /// Also, hashing filenames is expensive so we avoid doing it twice when the
2129     /// span starts and ends in the same file, which is almost always the case.
hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher)2130     fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) {
2131         const TAG_VALID_SPAN: u8 = 0;
2132         const TAG_INVALID_SPAN: u8 = 1;
2133         const TAG_RELATIVE_SPAN: u8 = 2;
2134 
2135         if !ctx.hash_spans() {
2136             return;
2137         }
2138 
2139         let span = self.data_untracked();
2140         span.ctxt.hash_stable(ctx, hasher);
2141         span.parent.hash_stable(ctx, hasher);
2142 
2143         if span.is_dummy() {
2144             Hash::hash(&TAG_INVALID_SPAN, hasher);
2145             return;
2146         }
2147 
2148         if let Some(parent) = span.parent {
2149             let def_span = ctx.def_span(parent).data_untracked();
2150             if def_span.contains(span) {
2151                 // This span is enclosed in a definition: only hash the relative position.
2152                 Hash::hash(&TAG_RELATIVE_SPAN, hasher);
2153                 (span.lo - def_span.lo).to_u32().hash_stable(ctx, hasher);
2154                 (span.hi - def_span.lo).to_u32().hash_stable(ctx, hasher);
2155                 return;
2156             }
2157         }
2158 
2159         // If this is not an empty or invalid span, we want to hash the last
2160         // position that belongs to it, as opposed to hashing the first
2161         // position past it.
2162         let Some((file, line_lo, col_lo, line_hi, col_hi)) = ctx.span_data_to_lines_and_cols(&span) else {
2163             Hash::hash(&TAG_INVALID_SPAN, hasher);
2164             return;
2165         };
2166 
2167         Hash::hash(&TAG_VALID_SPAN, hasher);
2168         Hash::hash(&file.name_hash, hasher);
2169 
2170         // Hash both the length and the end location (line/column) of a span. If we
2171         // hash only the length, for example, then two otherwise equal spans with
2172         // different end locations will have the same hash. This can cause a problem
2173         // during incremental compilation wherein a previous result for a query that
2174         // depends on the end location of a span will be incorrectly reused when the
2175         // end location of the span it depends on has changed (see issue #74890). A
2176         // similar analysis applies if some query depends specifically on the length
2177         // of the span, but we only hash the end location. So hash both.
2178 
2179         let col_lo_trunc = (col_lo.0 as u64) & 0xFF;
2180         let line_lo_trunc = ((line_lo as u64) & 0xFF_FF_FF) << 8;
2181         let col_hi_trunc = (col_hi.0 as u64) & 0xFF << 32;
2182         let line_hi_trunc = ((line_hi as u64) & 0xFF_FF_FF) << 40;
2183         let col_line = col_lo_trunc | line_lo_trunc | col_hi_trunc | line_hi_trunc;
2184         let len = (span.hi - span.lo).0;
2185         Hash::hash(&col_line, hasher);
2186         Hash::hash(&len, hasher);
2187     }
2188 }
2189 
2190 /// Useful type to use with `Result<>` indicate that an error has already
2191 /// been reported to the user, so no need to continue checking.
2192 #[derive(Clone, Copy, Debug, Encodable, Decodable, Hash, PartialEq, Eq, PartialOrd, Ord)]
2193 #[derive(HashStable_Generic)]
2194 pub struct ErrorGuaranteed(());
2195 
2196 impl ErrorGuaranteed {
2197     /// To be used only if you really know what you are doing... ideally, we would find a way to
2198     /// eliminate all calls to this method.
2199     #[deprecated = "`Session::delay_span_bug` should be preferred over this function"]
unchecked_claim_error_was_emitted() -> Self2200     pub fn unchecked_claim_error_was_emitted() -> Self {
2201         ErrorGuaranteed(())
2202     }
2203 }
2204