1 //! Source positions and related helper functions.
2 //!
3 //! Important concepts in this module include:
4 //!
5 //! - the *span*, represented by [`SpanData`] and related types;
6 //! - source code as represented by a [`SourceMap`]; and
7 //! - interned strings, represented by [`Symbol`]s, with some common symbols available statically in the [`sym`] module.
8 //!
9 //! Unlike most compilers, the span contains not only the position in the source code, but also various other metadata,
10 //! such as the edition and macro hygiene. This metadata is stored in [`SyntaxContext`] and [`ExpnData`].
11 //!
12 //! ## Note
13 //!
14 //! This API is completely unstable and subject to change.
15
16 #![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")]
17 #![feature(array_windows)]
18 #![feature(if_let_guard)]
19 #![feature(negative_impls)]
20 #![feature(min_specialization)]
21 #![feature(rustc_attrs)]
22 #![feature(let_chains)]
23 #![feature(round_char_boundary)]
24 #![deny(rustc::untranslatable_diagnostic)]
25 #![deny(rustc::diagnostic_outside_of_impl)]
26
27 #[macro_use]
28 extern crate rustc_macros;
29
30 #[macro_use]
31 extern crate tracing;
32
33 use rustc_data_structures::AtomicRef;
34 use rustc_macros::HashStable_Generic;
35 use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
36
37 mod caching_source_map_view;
38 pub mod source_map;
39 pub use self::caching_source_map_view::CachingSourceMapView;
40 use source_map::SourceMap;
41
42 pub mod edition;
43 use edition::Edition;
44 pub mod hygiene;
45 use hygiene::Transparency;
46 pub use hygiene::{DesugaringKind, ExpnKind, MacroKind};
47 pub use hygiene::{ExpnData, ExpnHash, ExpnId, LocalExpnId, SyntaxContext};
48 use rustc_data_structures::stable_hasher::HashingControls;
49 pub mod def_id;
50 use def_id::{CrateNum, DefId, DefPathHash, LocalDefId, LOCAL_CRATE};
51 pub mod edit_distance;
52 mod span_encoding;
53 pub use span_encoding::{Span, DUMMY_SP};
54
55 pub mod symbol;
56 pub use symbol::{sym, Symbol};
57
58 mod analyze_source_file;
59 pub mod fatal_error;
60
61 pub mod profiling;
62
63 use rustc_data_structures::stable_hasher::{Hash128, Hash64, HashStable, StableHasher};
64 use rustc_data_structures::sync::{Lock, Lrc};
65
66 use std::borrow::Cow;
67 use std::cmp::{self, Ordering};
68 use std::hash::Hash;
69 use std::ops::{Add, Range, Sub};
70 use std::path::{Path, PathBuf};
71 use std::str::FromStr;
72 use std::{fmt, iter};
73
74 use md5::Digest;
75 use md5::Md5;
76 use sha1::Sha1;
77 use sha2::Sha256;
78
79 #[cfg(test)]
80 mod tests;
81
82 /// Per-session global variables: this struct is stored in thread-local storage
83 /// in such a way that it is accessible without any kind of handle to all
84 /// threads within the compilation session, but is not accessible outside the
85 /// session.
86 pub struct SessionGlobals {
87 symbol_interner: symbol::Interner,
88 span_interner: Lock<span_encoding::SpanInterner>,
89 hygiene_data: Lock<hygiene::HygieneData>,
90
91 /// A reference to the source map in the `Session`. It's an `Option`
92 /// because it can't be initialized until `Session` is created, which
93 /// happens after `SessionGlobals`. `set_source_map` does the
94 /// initialization.
95 ///
96 /// This field should only be used in places where the `Session` is truly
97 /// not available, such as `<Span as Debug>::fmt`.
98 source_map: Lock<Option<Lrc<SourceMap>>>,
99 }
100
101 impl SessionGlobals {
new(edition: Edition) -> SessionGlobals102 pub fn new(edition: Edition) -> SessionGlobals {
103 SessionGlobals {
104 symbol_interner: symbol::Interner::fresh(),
105 span_interner: Lock::new(span_encoding::SpanInterner::default()),
106 hygiene_data: Lock::new(hygiene::HygieneData::new(edition)),
107 source_map: Lock::new(None),
108 }
109 }
110 }
111
112 #[inline]
create_session_globals_then<R>(edition: Edition, f: impl FnOnce() -> R) -> R113 pub fn create_session_globals_then<R>(edition: Edition, f: impl FnOnce() -> R) -> R {
114 assert!(
115 !SESSION_GLOBALS.is_set(),
116 "SESSION_GLOBALS should never be overwritten! \
117 Use another thread if you need another SessionGlobals"
118 );
119 let session_globals = SessionGlobals::new(edition);
120 SESSION_GLOBALS.set(&session_globals, f)
121 }
122
123 #[inline]
set_session_globals_then<R>(session_globals: &SessionGlobals, f: impl FnOnce() -> R) -> R124 pub fn set_session_globals_then<R>(session_globals: &SessionGlobals, f: impl FnOnce() -> R) -> R {
125 assert!(
126 !SESSION_GLOBALS.is_set(),
127 "SESSION_GLOBALS should never be overwritten! \
128 Use another thread if you need another SessionGlobals"
129 );
130 SESSION_GLOBALS.set(session_globals, f)
131 }
132
133 #[inline]
create_default_session_if_not_set_then<R, F>(f: F) -> R where F: FnOnce(&SessionGlobals) -> R,134 pub fn create_default_session_if_not_set_then<R, F>(f: F) -> R
135 where
136 F: FnOnce(&SessionGlobals) -> R,
137 {
138 create_session_if_not_set_then(edition::DEFAULT_EDITION, f)
139 }
140
141 #[inline]
create_session_if_not_set_then<R, F>(edition: Edition, f: F) -> R where F: FnOnce(&SessionGlobals) -> R,142 pub fn create_session_if_not_set_then<R, F>(edition: Edition, f: F) -> R
143 where
144 F: FnOnce(&SessionGlobals) -> R,
145 {
146 if !SESSION_GLOBALS.is_set() {
147 let session_globals = SessionGlobals::new(edition);
148 SESSION_GLOBALS.set(&session_globals, || SESSION_GLOBALS.with(f))
149 } else {
150 SESSION_GLOBALS.with(f)
151 }
152 }
153
154 #[inline]
with_session_globals<R, F>(f: F) -> R where F: FnOnce(&SessionGlobals) -> R,155 pub fn with_session_globals<R, F>(f: F) -> R
156 where
157 F: FnOnce(&SessionGlobals) -> R,
158 {
159 SESSION_GLOBALS.with(f)
160 }
161
162 #[inline]
create_default_session_globals_then<R>(f: impl FnOnce() -> R) -> R163 pub fn create_default_session_globals_then<R>(f: impl FnOnce() -> R) -> R {
164 create_session_globals_then(edition::DEFAULT_EDITION, f)
165 }
166
167 // If this ever becomes non thread-local, `decode_syntax_context`
168 // and `decode_expn_id` will need to be updated to handle concurrent
169 // deserialization.
170 scoped_tls::scoped_thread_local!(static SESSION_GLOBALS: SessionGlobals);
171
172 // FIXME: We should use this enum or something like it to get rid of the
173 // use of magic `/rust/1.x/...` paths across the board.
174 #[derive(Debug, Eq, PartialEq, Clone, Ord, PartialOrd)]
175 #[derive(Decodable)]
176 pub enum RealFileName {
177 LocalPath(PathBuf),
178 /// For remapped paths (namely paths into libstd that have been mapped
179 /// to the appropriate spot on the local host's file system, and local file
180 /// system paths that have been remapped with `FilePathMapping`),
181 Remapped {
182 /// `local_path` is the (host-dependent) local path to the file. This is
183 /// None if the file was imported from another crate
184 local_path: Option<PathBuf>,
185 /// `virtual_name` is the stable path rustc will store internally within
186 /// build artifacts.
187 virtual_name: PathBuf,
188 },
189 }
190
191 impl Hash for RealFileName {
hash<H: std::hash::Hasher>(&self, state: &mut H)192 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
193 // To prevent #70924 from happening again we should only hash the
194 // remapped (virtualized) path if that exists. This is because
195 // virtualized paths to sysroot crates (/rust/$hash or /rust/$version)
196 // remain stable even if the corresponding local_path changes
197 self.remapped_path_if_available().hash(state)
198 }
199 }
200
201 // This is functionally identical to #[derive(Encodable)], with the exception of
202 // an added assert statement
203 impl<S: Encoder> Encodable<S> for RealFileName {
encode(&self, encoder: &mut S)204 fn encode(&self, encoder: &mut S) {
205 match *self {
206 RealFileName::LocalPath(ref local_path) => encoder.emit_enum_variant(0, |encoder| {
207 local_path.encode(encoder);
208 }),
209
210 RealFileName::Remapped { ref local_path, ref virtual_name } => encoder
211 .emit_enum_variant(1, |encoder| {
212 // For privacy and build reproducibility, we must not embed host-dependant path in artifacts
213 // if they have been remapped by --remap-path-prefix
214 assert!(local_path.is_none());
215 local_path.encode(encoder);
216 virtual_name.encode(encoder);
217 }),
218 }
219 }
220 }
221
222 impl RealFileName {
223 /// Returns the path suitable for reading from the file system on the local host,
224 /// if this information exists.
225 /// Avoid embedding this in build artifacts; see `remapped_path_if_available()` for that.
local_path(&self) -> Option<&Path>226 pub fn local_path(&self) -> Option<&Path> {
227 match self {
228 RealFileName::LocalPath(p) => Some(p),
229 RealFileName::Remapped { local_path, virtual_name: _ } => local_path.as_deref(),
230 }
231 }
232
233 /// Returns the path suitable for reading from the file system on the local host,
234 /// if this information exists.
235 /// Avoid embedding this in build artifacts; see `remapped_path_if_available()` for that.
into_local_path(self) -> Option<PathBuf>236 pub fn into_local_path(self) -> Option<PathBuf> {
237 match self {
238 RealFileName::LocalPath(p) => Some(p),
239 RealFileName::Remapped { local_path: p, virtual_name: _ } => p,
240 }
241 }
242
243 /// Returns the path suitable for embedding into build artifacts. This would still
244 /// be a local path if it has not been remapped. A remapped path will not correspond
245 /// to a valid file system path: see `local_path_if_available()` for something that
246 /// is more likely to return paths into the local host file system.
remapped_path_if_available(&self) -> &Path247 pub fn remapped_path_if_available(&self) -> &Path {
248 match self {
249 RealFileName::LocalPath(p)
250 | RealFileName::Remapped { local_path: _, virtual_name: p } => p,
251 }
252 }
253
254 /// Returns the path suitable for reading from the file system on the local host,
255 /// if this information exists. Otherwise returns the remapped name.
256 /// Avoid embedding this in build artifacts; see `remapped_path_if_available()` for that.
local_path_if_available(&self) -> &Path257 pub fn local_path_if_available(&self) -> &Path {
258 match self {
259 RealFileName::LocalPath(path)
260 | RealFileName::Remapped { local_path: None, virtual_name: path }
261 | RealFileName::Remapped { local_path: Some(path), virtual_name: _ } => path,
262 }
263 }
264
to_string_lossy(&self, display_pref: FileNameDisplayPreference) -> Cow<'_, str>265 pub fn to_string_lossy(&self, display_pref: FileNameDisplayPreference) -> Cow<'_, str> {
266 match display_pref {
267 FileNameDisplayPreference::Local => self.local_path_if_available().to_string_lossy(),
268 FileNameDisplayPreference::Remapped => {
269 self.remapped_path_if_available().to_string_lossy()
270 }
271 FileNameDisplayPreference::Short => self
272 .local_path_if_available()
273 .file_name()
274 .map_or_else(|| "".into(), |f| f.to_string_lossy()),
275 }
276 }
277 }
278
279 /// Differentiates between real files and common virtual files.
280 #[derive(Debug, Eq, PartialEq, Clone, Ord, PartialOrd, Hash)]
281 #[derive(Decodable, Encodable)]
282 pub enum FileName {
283 Real(RealFileName),
284 /// Call to `quote!`.
285 QuoteExpansion(Hash64),
286 /// Command line.
287 Anon(Hash64),
288 /// Hack in `src/librustc_ast/parse.rs`.
289 // FIXME(jseyfried)
290 MacroExpansion(Hash64),
291 ProcMacroSourceCode(Hash64),
292 /// Strings provided as `--cfg [cfgspec]` stored in a `crate_cfg`.
293 CfgSpec(Hash64),
294 /// Strings provided as crate attributes in the CLI.
295 CliCrateAttr(Hash64),
296 /// Custom sources for explicit parser calls from plugins and drivers.
297 Custom(String),
298 DocTest(PathBuf, isize),
299 /// Post-substitution inline assembly from LLVM.
300 InlineAsm(Hash64),
301 }
302
303 impl From<PathBuf> for FileName {
from(p: PathBuf) -> Self304 fn from(p: PathBuf) -> Self {
305 assert!(!p.to_string_lossy().ends_with('>'));
306 FileName::Real(RealFileName::LocalPath(p))
307 }
308 }
309
310 #[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
311 pub enum FileNameDisplayPreference {
312 /// Display the path after the application of rewrite rules provided via `--remap-path-prefix`.
313 /// This is appropriate for paths that get embedded into files produced by the compiler.
314 Remapped,
315 /// Display the path before the application of rewrite rules provided via `--remap-path-prefix`.
316 /// This is appropriate for use in user-facing output (such as diagnostics).
317 Local,
318 /// Display only the filename, as a way to reduce the verbosity of the output.
319 /// This is appropriate for use in user-facing output (such as diagnostics).
320 Short,
321 }
322
323 pub struct FileNameDisplay<'a> {
324 inner: &'a FileName,
325 display_pref: FileNameDisplayPreference,
326 }
327
328 impl fmt::Display for FileNameDisplay<'_> {
fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result329 fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
330 use FileName::*;
331 match *self.inner {
332 Real(ref name) => {
333 write!(fmt, "{}", name.to_string_lossy(self.display_pref))
334 }
335 QuoteExpansion(_) => write!(fmt, "<quote expansion>"),
336 MacroExpansion(_) => write!(fmt, "<macro expansion>"),
337 Anon(_) => write!(fmt, "<anon>"),
338 ProcMacroSourceCode(_) => write!(fmt, "<proc-macro source code>"),
339 CfgSpec(_) => write!(fmt, "<cfgspec>"),
340 CliCrateAttr(_) => write!(fmt, "<crate attribute>"),
341 Custom(ref s) => write!(fmt, "<{s}>"),
342 DocTest(ref path, _) => write!(fmt, "{}", path.display()),
343 InlineAsm(_) => write!(fmt, "<inline asm>"),
344 }
345 }
346 }
347
348 impl<'a> FileNameDisplay<'a> {
to_string_lossy(&self) -> Cow<'a, str>349 pub fn to_string_lossy(&self) -> Cow<'a, str> {
350 match self.inner {
351 FileName::Real(ref inner) => inner.to_string_lossy(self.display_pref),
352 _ => Cow::from(self.to_string()),
353 }
354 }
355 }
356
357 impl FileName {
is_real(&self) -> bool358 pub fn is_real(&self) -> bool {
359 use FileName::*;
360 match *self {
361 Real(_) => true,
362 Anon(_)
363 | MacroExpansion(_)
364 | ProcMacroSourceCode(_)
365 | CfgSpec(_)
366 | CliCrateAttr(_)
367 | Custom(_)
368 | QuoteExpansion(_)
369 | DocTest(_, _)
370 | InlineAsm(_) => false,
371 }
372 }
373
prefer_remapped(&self) -> FileNameDisplay<'_>374 pub fn prefer_remapped(&self) -> FileNameDisplay<'_> {
375 FileNameDisplay { inner: self, display_pref: FileNameDisplayPreference::Remapped }
376 }
377
378 /// This may include transient local filesystem information.
379 /// Must not be embedded in build outputs.
prefer_local(&self) -> FileNameDisplay<'_>380 pub fn prefer_local(&self) -> FileNameDisplay<'_> {
381 FileNameDisplay { inner: self, display_pref: FileNameDisplayPreference::Local }
382 }
383
display(&self, display_pref: FileNameDisplayPreference) -> FileNameDisplay<'_>384 pub fn display(&self, display_pref: FileNameDisplayPreference) -> FileNameDisplay<'_> {
385 FileNameDisplay { inner: self, display_pref }
386 }
387
macro_expansion_source_code(src: &str) -> FileName388 pub fn macro_expansion_source_code(src: &str) -> FileName {
389 let mut hasher = StableHasher::new();
390 src.hash(&mut hasher);
391 FileName::MacroExpansion(hasher.finish())
392 }
393
anon_source_code(src: &str) -> FileName394 pub fn anon_source_code(src: &str) -> FileName {
395 let mut hasher = StableHasher::new();
396 src.hash(&mut hasher);
397 FileName::Anon(hasher.finish())
398 }
399
proc_macro_source_code(src: &str) -> FileName400 pub fn proc_macro_source_code(src: &str) -> FileName {
401 let mut hasher = StableHasher::new();
402 src.hash(&mut hasher);
403 FileName::ProcMacroSourceCode(hasher.finish())
404 }
405
cfg_spec_source_code(src: &str) -> FileName406 pub fn cfg_spec_source_code(src: &str) -> FileName {
407 let mut hasher = StableHasher::new();
408 src.hash(&mut hasher);
409 FileName::QuoteExpansion(hasher.finish())
410 }
411
cli_crate_attr_source_code(src: &str) -> FileName412 pub fn cli_crate_attr_source_code(src: &str) -> FileName {
413 let mut hasher = StableHasher::new();
414 src.hash(&mut hasher);
415 FileName::CliCrateAttr(hasher.finish())
416 }
417
doc_test_source_code(path: PathBuf, line: isize) -> FileName418 pub fn doc_test_source_code(path: PathBuf, line: isize) -> FileName {
419 FileName::DocTest(path, line)
420 }
421
inline_asm_source_code(src: &str) -> FileName422 pub fn inline_asm_source_code(src: &str) -> FileName {
423 let mut hasher = StableHasher::new();
424 src.hash(&mut hasher);
425 FileName::InlineAsm(hasher.finish())
426 }
427 }
428
429 /// Represents a span.
430 ///
431 /// Spans represent a region of code, used for error reporting. Positions in spans
432 /// are *absolute* positions from the beginning of the [`SourceMap`], not positions
433 /// relative to [`SourceFile`]s. Methods on the `SourceMap` can be used to relate spans back
434 /// to the original source.
435 ///
436 /// You must be careful if the span crosses more than one file, since you will not be
437 /// able to use many of the functions on spans in source_map and you cannot assume
438 /// that the length of the span is equal to `span.hi - span.lo`; there may be space in the
439 /// [`BytePos`] range between files.
440 ///
441 /// `SpanData` is public because `Span` uses a thread-local interner and can't be
442 /// sent to other threads, but some pieces of performance infra run in a separate thread.
443 /// Using `Span` is generally preferred.
444 #[derive(Clone, Copy, Hash, PartialEq, Eq)]
445 pub struct SpanData {
446 pub lo: BytePos,
447 pub hi: BytePos,
448 /// Information about where the macro came from, if this piece of
449 /// code was created by a macro expansion.
450 pub ctxt: SyntaxContext,
451 pub parent: Option<LocalDefId>,
452 }
453
454 // Order spans by position in the file.
455 impl Ord for SpanData {
cmp(&self, other: &Self) -> Ordering456 fn cmp(&self, other: &Self) -> Ordering {
457 let SpanData {
458 lo: s_lo,
459 hi: s_hi,
460 ctxt: s_ctxt,
461 // `LocalDefId` does not implement `Ord`.
462 // The other fields are enough to determine in-file order.
463 parent: _,
464 } = self;
465 let SpanData {
466 lo: o_lo,
467 hi: o_hi,
468 ctxt: o_ctxt,
469 // `LocalDefId` does not implement `Ord`.
470 // The other fields are enough to determine in-file order.
471 parent: _,
472 } = other;
473
474 (s_lo, s_hi, s_ctxt).cmp(&(o_lo, o_hi, o_ctxt))
475 }
476 }
477
478 impl PartialOrd for SpanData {
partial_cmp(&self, other: &Self) -> Option<Ordering>479 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
480 Some(self.cmp(other))
481 }
482 }
483
484 impl SpanData {
485 #[inline]
span(&self) -> Span486 pub fn span(&self) -> Span {
487 Span::new(self.lo, self.hi, self.ctxt, self.parent)
488 }
489 #[inline]
with_lo(&self, lo: BytePos) -> Span490 pub fn with_lo(&self, lo: BytePos) -> Span {
491 Span::new(lo, self.hi, self.ctxt, self.parent)
492 }
493 #[inline]
with_hi(&self, hi: BytePos) -> Span494 pub fn with_hi(&self, hi: BytePos) -> Span {
495 Span::new(self.lo, hi, self.ctxt, self.parent)
496 }
497 #[inline]
with_ctxt(&self, ctxt: SyntaxContext) -> Span498 pub fn with_ctxt(&self, ctxt: SyntaxContext) -> Span {
499 Span::new(self.lo, self.hi, ctxt, self.parent)
500 }
501 #[inline]
with_parent(&self, parent: Option<LocalDefId>) -> Span502 pub fn with_parent(&self, parent: Option<LocalDefId>) -> Span {
503 Span::new(self.lo, self.hi, self.ctxt, parent)
504 }
505 /// Returns `true` if this is a dummy span with any hygienic context.
506 #[inline]
is_dummy(self) -> bool507 pub fn is_dummy(self) -> bool {
508 self.lo.0 == 0 && self.hi.0 == 0
509 }
510 #[inline]
is_visible(self, sm: &SourceMap) -> bool511 pub fn is_visible(self, sm: &SourceMap) -> bool {
512 !self.is_dummy() && sm.is_span_accessible(self.span())
513 }
514 /// Returns `true` if `self` fully encloses `other`.
contains(self, other: Self) -> bool515 pub fn contains(self, other: Self) -> bool {
516 self.lo <= other.lo && other.hi <= self.hi
517 }
518 }
519
520 // The interner is pointed to by a thread local value which is only set on the main thread
521 // with parallelization is disabled. So we don't allow `Span` to transfer between threads
522 // to avoid panics and other errors, even though it would be memory safe to do so.
523 #[cfg(not(parallel_compiler))]
524 impl !Send for Span {}
525 #[cfg(not(parallel_compiler))]
526 impl !Sync for Span {}
527
528 impl PartialOrd for Span {
partial_cmp(&self, rhs: &Self) -> Option<Ordering>529 fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> {
530 PartialOrd::partial_cmp(&self.data(), &rhs.data())
531 }
532 }
533 impl Ord for Span {
cmp(&self, rhs: &Self) -> Ordering534 fn cmp(&self, rhs: &Self) -> Ordering {
535 Ord::cmp(&self.data(), &rhs.data())
536 }
537 }
538
539 impl Span {
540 #[inline]
lo(self) -> BytePos541 pub fn lo(self) -> BytePos {
542 self.data().lo
543 }
544 #[inline]
with_lo(self, lo: BytePos) -> Span545 pub fn with_lo(self, lo: BytePos) -> Span {
546 self.data().with_lo(lo)
547 }
548 #[inline]
hi(self) -> BytePos549 pub fn hi(self) -> BytePos {
550 self.data().hi
551 }
552 #[inline]
with_hi(self, hi: BytePos) -> Span553 pub fn with_hi(self, hi: BytePos) -> Span {
554 self.data().with_hi(hi)
555 }
556 #[inline]
eq_ctxt(self, other: Span) -> bool557 pub fn eq_ctxt(self, other: Span) -> bool {
558 self.data_untracked().ctxt == other.data_untracked().ctxt
559 }
560 #[inline]
with_ctxt(self, ctxt: SyntaxContext) -> Span561 pub fn with_ctxt(self, ctxt: SyntaxContext) -> Span {
562 self.data_untracked().with_ctxt(ctxt)
563 }
564 #[inline]
parent(self) -> Option<LocalDefId>565 pub fn parent(self) -> Option<LocalDefId> {
566 self.data().parent
567 }
568 #[inline]
with_parent(self, ctxt: Option<LocalDefId>) -> Span569 pub fn with_parent(self, ctxt: Option<LocalDefId>) -> Span {
570 self.data().with_parent(ctxt)
571 }
572
573 /// Returns `true` if this is a dummy span with any hygienic context.
574 #[inline]
is_dummy(self) -> bool575 pub fn is_dummy(self) -> bool {
576 self.data_untracked().is_dummy()
577 }
578
579 #[inline]
is_visible(self, sm: &SourceMap) -> bool580 pub fn is_visible(self, sm: &SourceMap) -> bool {
581 self.data_untracked().is_visible(sm)
582 }
583
584 /// Returns `true` if this span comes from any kind of macro, desugaring or inlining.
585 #[inline]
from_expansion(self) -> bool586 pub fn from_expansion(self) -> bool {
587 self.ctxt() != SyntaxContext::root()
588 }
589
590 /// Returns `true` if `span` originates in a macro's expansion where debuginfo should be
591 /// collapsed.
in_macro_expansion_with_collapse_debuginfo(self) -> bool592 pub fn in_macro_expansion_with_collapse_debuginfo(self) -> bool {
593 let outer_expn = self.ctxt().outer_expn_data();
594 matches!(outer_expn.kind, ExpnKind::Macro(..)) && outer_expn.collapse_debuginfo
595 }
596
597 /// Returns `true` if `span` originates in a derive-macro's expansion.
in_derive_expansion(self) -> bool598 pub fn in_derive_expansion(self) -> bool {
599 matches!(self.ctxt().outer_expn_data().kind, ExpnKind::Macro(MacroKind::Derive, _))
600 }
601
602 /// Gate suggestions that would not be appropriate in a context the user didn't write.
can_be_used_for_suggestions(self) -> bool603 pub fn can_be_used_for_suggestions(self) -> bool {
604 !self.from_expansion()
605 // FIXME: If this span comes from a `derive` macro but it points at code the user wrote,
606 // the callsite span and the span will be pointing at different places. It also means that
607 // we can safely provide suggestions on this span.
608 || (matches!(self.ctxt().outer_expn_data().kind, ExpnKind::Macro(MacroKind::Derive, _))
609 && self.parent_callsite().map(|p| (p.lo(), p.hi())) != Some((self.lo(), self.hi())))
610 }
611
612 #[inline]
with_root_ctxt(lo: BytePos, hi: BytePos) -> Span613 pub fn with_root_ctxt(lo: BytePos, hi: BytePos) -> Span {
614 Span::new(lo, hi, SyntaxContext::root(), None)
615 }
616
617 /// Returns a new span representing an empty span at the beginning of this span.
618 #[inline]
shrink_to_lo(self) -> Span619 pub fn shrink_to_lo(self) -> Span {
620 let span = self.data_untracked();
621 span.with_hi(span.lo)
622 }
623 /// Returns a new span representing an empty span at the end of this span.
624 #[inline]
shrink_to_hi(self) -> Span625 pub fn shrink_to_hi(self) -> Span {
626 let span = self.data_untracked();
627 span.with_lo(span.hi)
628 }
629
630 #[inline]
631 /// Returns `true` if `hi == lo`.
is_empty(self) -> bool632 pub fn is_empty(self) -> bool {
633 let span = self.data_untracked();
634 span.hi == span.lo
635 }
636
637 /// Returns `self` if `self` is not the dummy span, and `other` otherwise.
substitute_dummy(self, other: Span) -> Span638 pub fn substitute_dummy(self, other: Span) -> Span {
639 if self.is_dummy() { other } else { self }
640 }
641
642 /// Returns `true` if `self` fully encloses `other`.
contains(self, other: Span) -> bool643 pub fn contains(self, other: Span) -> bool {
644 let span = self.data();
645 let other = other.data();
646 span.contains(other)
647 }
648
649 /// Returns `true` if `self` touches `other`.
overlaps(self, other: Span) -> bool650 pub fn overlaps(self, other: Span) -> bool {
651 let span = self.data();
652 let other = other.data();
653 span.lo < other.hi && other.lo < span.hi
654 }
655
656 /// Returns `true` if the spans are equal with regards to the source text.
657 ///
658 /// Use this instead of `==` when either span could be generated code,
659 /// and you only care that they point to the same bytes of source text.
source_equal(self, other: Span) -> bool660 pub fn source_equal(self, other: Span) -> bool {
661 let span = self.data();
662 let other = other.data();
663 span.lo == other.lo && span.hi == other.hi
664 }
665
666 /// Returns `Some(span)`, where the start is trimmed by the end of `other`.
trim_start(self, other: Span) -> Option<Span>667 pub fn trim_start(self, other: Span) -> Option<Span> {
668 let span = self.data();
669 let other = other.data();
670 if span.hi > other.hi { Some(span.with_lo(cmp::max(span.lo, other.hi))) } else { None }
671 }
672
673 /// Returns the source span -- this is either the supplied span, or the span for
674 /// the macro callsite that expanded to it.
source_callsite(self) -> Span675 pub fn source_callsite(self) -> Span {
676 let expn_data = self.ctxt().outer_expn_data();
677 if !expn_data.is_root() { expn_data.call_site.source_callsite() } else { self }
678 }
679
680 /// The `Span` for the tokens in the previous macro expansion from which `self` was generated,
681 /// if any.
parent_callsite(self) -> Option<Span>682 pub fn parent_callsite(self) -> Option<Span> {
683 let expn_data = self.ctxt().outer_expn_data();
684 if !expn_data.is_root() { Some(expn_data.call_site) } else { None }
685 }
686
687 /// Walk down the expansion ancestors to find a span that's contained within `outer`.
find_ancestor_inside(mut self, outer: Span) -> Option<Span>688 pub fn find_ancestor_inside(mut self, outer: Span) -> Option<Span> {
689 while !outer.contains(self) {
690 self = self.parent_callsite()?;
691 }
692 Some(self)
693 }
694
695 /// Like `find_ancestor_inside`, but specifically for when spans might not
696 /// overlaps. Take care when using this, and prefer `find_ancestor_inside`
697 /// when you know that the spans are nested (modulo macro expansion).
find_ancestor_in_same_ctxt(mut self, other: Span) -> Option<Span>698 pub fn find_ancestor_in_same_ctxt(mut self, other: Span) -> Option<Span> {
699 while !Span::eq_ctxt(self, other) {
700 self = self.parent_callsite()?;
701 }
702 Some(self)
703 }
704
705 /// Edition of the crate from which this span came.
edition(self) -> edition::Edition706 pub fn edition(self) -> edition::Edition {
707 self.ctxt().edition()
708 }
709
710 #[inline]
is_rust_2015(self) -> bool711 pub fn is_rust_2015(self) -> bool {
712 self.edition().is_rust_2015()
713 }
714
715 #[inline]
rust_2018(self) -> bool716 pub fn rust_2018(self) -> bool {
717 self.edition().rust_2018()
718 }
719
720 #[inline]
rust_2021(self) -> bool721 pub fn rust_2021(self) -> bool {
722 self.edition().rust_2021()
723 }
724
725 #[inline]
rust_2024(self) -> bool726 pub fn rust_2024(self) -> bool {
727 self.edition().rust_2024()
728 }
729
730 /// Returns the source callee.
731 ///
732 /// Returns `None` if the supplied span has no expansion trace,
733 /// else returns the `ExpnData` for the macro definition
734 /// corresponding to the source callsite.
source_callee(self) -> Option<ExpnData>735 pub fn source_callee(self) -> Option<ExpnData> {
736 let expn_data = self.ctxt().outer_expn_data();
737
738 // Create an iterator of call site expansions
739 iter::successors(Some(expn_data), |expn_data| {
740 Some(expn_data.call_site.ctxt().outer_expn_data())
741 })
742 // Find the last expansion which is not root
743 .take_while(|expn_data| !expn_data.is_root())
744 .last()
745 }
746
747 /// Checks if a span is "internal" to a macro in which `#[unstable]`
748 /// items can be used (that is, a macro marked with
749 /// `#[allow_internal_unstable]`).
allows_unstable(self, feature: Symbol) -> bool750 pub fn allows_unstable(self, feature: Symbol) -> bool {
751 self.ctxt()
752 .outer_expn_data()
753 .allow_internal_unstable
754 .is_some_and(|features| features.iter().any(|&f| f == feature))
755 }
756
757 /// Checks if this span arises from a compiler desugaring of kind `kind`.
is_desugaring(self, kind: DesugaringKind) -> bool758 pub fn is_desugaring(self, kind: DesugaringKind) -> bool {
759 match self.ctxt().outer_expn_data().kind {
760 ExpnKind::Desugaring(k) => k == kind,
761 _ => false,
762 }
763 }
764
765 /// Returns the compiler desugaring that created this span, or `None`
766 /// if this span is not from a desugaring.
desugaring_kind(self) -> Option<DesugaringKind>767 pub fn desugaring_kind(self) -> Option<DesugaringKind> {
768 match self.ctxt().outer_expn_data().kind {
769 ExpnKind::Desugaring(k) => Some(k),
770 _ => None,
771 }
772 }
773
774 /// Checks if a span is "internal" to a macro in which `unsafe`
775 /// can be used without triggering the `unsafe_code` lint.
776 /// (that is, a macro marked with `#[allow_internal_unsafe]`).
allows_unsafe(self) -> bool777 pub fn allows_unsafe(self) -> bool {
778 self.ctxt().outer_expn_data().allow_internal_unsafe
779 }
780
macro_backtrace(mut self) -> impl Iterator<Item = ExpnData>781 pub fn macro_backtrace(mut self) -> impl Iterator<Item = ExpnData> {
782 let mut prev_span = DUMMY_SP;
783 iter::from_fn(move || {
784 loop {
785 let expn_data = self.ctxt().outer_expn_data();
786 if expn_data.is_root() {
787 return None;
788 }
789
790 let is_recursive = expn_data.call_site.source_equal(prev_span);
791
792 prev_span = self;
793 self = expn_data.call_site;
794
795 // Don't print recursive invocations.
796 if !is_recursive {
797 return Some(expn_data);
798 }
799 }
800 })
801 }
802
803 /// Splits a span into two composite spans around a certain position.
split_at(self, pos: u32) -> (Span, Span)804 pub fn split_at(self, pos: u32) -> (Span, Span) {
805 let len = self.hi().0 - self.lo().0;
806 debug_assert!(pos <= len);
807
808 let split_pos = BytePos(self.lo().0 + pos);
809 (
810 Span::new(self.lo(), split_pos, self.ctxt(), self.parent()),
811 Span::new(split_pos, self.hi(), self.ctxt(), self.parent()),
812 )
813 }
814
815 /// Returns a `Span` that would enclose both `self` and `end`.
816 ///
817 /// Note that this can also be used to extend the span "backwards":
818 /// `start.to(end)` and `end.to(start)` return the same `Span`.
819 ///
820 /// ```text
821 /// ____ ___
822 /// self lorem ipsum end
823 /// ^^^^^^^^^^^^^^^^^^^^
824 /// ```
to(self, end: Span) -> Span825 pub fn to(self, end: Span) -> Span {
826 let span_data = self.data();
827 let end_data = end.data();
828 // FIXME(jseyfried): `self.ctxt` should always equal `end.ctxt` here (cf. issue #23480).
829 // Return the macro span on its own to avoid weird diagnostic output. It is preferable to
830 // have an incomplete span than a completely nonsensical one.
831 if span_data.ctxt != end_data.ctxt {
832 if span_data.ctxt.is_root() {
833 return end;
834 } else if end_data.ctxt.is_root() {
835 return self;
836 }
837 // Both spans fall within a macro.
838 // FIXME(estebank): check if it is the *same* macro.
839 }
840 Span::new(
841 cmp::min(span_data.lo, end_data.lo),
842 cmp::max(span_data.hi, end_data.hi),
843 if span_data.ctxt.is_root() { end_data.ctxt } else { span_data.ctxt },
844 if span_data.parent == end_data.parent { span_data.parent } else { None },
845 )
846 }
847
848 /// Returns a `Span` between the end of `self` to the beginning of `end`.
849 ///
850 /// ```text
851 /// ____ ___
852 /// self lorem ipsum end
853 /// ^^^^^^^^^^^^^
854 /// ```
between(self, end: Span) -> Span855 pub fn between(self, end: Span) -> Span {
856 let span = self.data();
857 let end = end.data();
858 Span::new(
859 span.hi,
860 end.lo,
861 if end.ctxt.is_root() { end.ctxt } else { span.ctxt },
862 if span.parent == end.parent { span.parent } else { None },
863 )
864 }
865
866 /// Returns a `Span` from the beginning of `self` until the beginning of `end`.
867 ///
868 /// ```text
869 /// ____ ___
870 /// self lorem ipsum end
871 /// ^^^^^^^^^^^^^^^^^
872 /// ```
until(self, end: Span) -> Span873 pub fn until(self, end: Span) -> Span {
874 // Most of this function's body is copied from `to`.
875 // We can't just do `self.to(end.shrink_to_lo())`,
876 // because to also does some magic where it uses min/max so
877 // it can handle overlapping spans. Some advanced mis-use of
878 // `until` with different ctxts makes this visible.
879 let span_data = self.data();
880 let end_data = end.data();
881 // FIXME(jseyfried): `self.ctxt` should always equal `end.ctxt` here (cf. issue #23480).
882 // Return the macro span on its own to avoid weird diagnostic output. It is preferable to
883 // have an incomplete span than a completely nonsensical one.
884 if span_data.ctxt != end_data.ctxt {
885 if span_data.ctxt.is_root() {
886 return end;
887 } else if end_data.ctxt.is_root() {
888 return self;
889 }
890 // Both spans fall within a macro.
891 // FIXME(estebank): check if it is the *same* macro.
892 }
893 Span::new(
894 span_data.lo,
895 end_data.lo,
896 if end_data.ctxt.is_root() { end_data.ctxt } else { span_data.ctxt },
897 if span_data.parent == end_data.parent { span_data.parent } else { None },
898 )
899 }
900
from_inner(self, inner: InnerSpan) -> Span901 pub fn from_inner(self, inner: InnerSpan) -> Span {
902 let span = self.data();
903 Span::new(
904 span.lo + BytePos::from_usize(inner.start),
905 span.lo + BytePos::from_usize(inner.end),
906 span.ctxt,
907 span.parent,
908 )
909 }
910
911 /// Equivalent of `Span::def_site` from the proc macro API,
912 /// except that the location is taken from the `self` span.
with_def_site_ctxt(self, expn_id: ExpnId) -> Span913 pub fn with_def_site_ctxt(self, expn_id: ExpnId) -> Span {
914 self.with_ctxt_from_mark(expn_id, Transparency::Opaque)
915 }
916
917 /// Equivalent of `Span::call_site` from the proc macro API,
918 /// except that the location is taken from the `self` span.
with_call_site_ctxt(self, expn_id: ExpnId) -> Span919 pub fn with_call_site_ctxt(self, expn_id: ExpnId) -> Span {
920 self.with_ctxt_from_mark(expn_id, Transparency::Transparent)
921 }
922
923 /// Equivalent of `Span::mixed_site` from the proc macro API,
924 /// except that the location is taken from the `self` span.
with_mixed_site_ctxt(self, expn_id: ExpnId) -> Span925 pub fn with_mixed_site_ctxt(self, expn_id: ExpnId) -> Span {
926 self.with_ctxt_from_mark(expn_id, Transparency::SemiTransparent)
927 }
928
929 /// Produces a span with the same location as `self` and context produced by a macro with the
930 /// given ID and transparency, assuming that macro was defined directly and not produced by
931 /// some other macro (which is the case for built-in and procedural macros).
with_ctxt_from_mark(self, expn_id: ExpnId, transparency: Transparency) -> Span932 pub fn with_ctxt_from_mark(self, expn_id: ExpnId, transparency: Transparency) -> Span {
933 self.with_ctxt(SyntaxContext::root().apply_mark(expn_id, transparency))
934 }
935
936 #[inline]
apply_mark(self, expn_id: ExpnId, transparency: Transparency) -> Span937 pub fn apply_mark(self, expn_id: ExpnId, transparency: Transparency) -> Span {
938 let span = self.data();
939 span.with_ctxt(span.ctxt.apply_mark(expn_id, transparency))
940 }
941
942 #[inline]
remove_mark(&mut self) -> ExpnId943 pub fn remove_mark(&mut self) -> ExpnId {
944 let mut span = self.data();
945 let mark = span.ctxt.remove_mark();
946 *self = Span::new(span.lo, span.hi, span.ctxt, span.parent);
947 mark
948 }
949
950 #[inline]
adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId>951 pub fn adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId> {
952 let mut span = self.data();
953 let mark = span.ctxt.adjust(expn_id);
954 *self = Span::new(span.lo, span.hi, span.ctxt, span.parent);
955 mark
956 }
957
958 #[inline]
normalize_to_macros_2_0_and_adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId>959 pub fn normalize_to_macros_2_0_and_adjust(&mut self, expn_id: ExpnId) -> Option<ExpnId> {
960 let mut span = self.data();
961 let mark = span.ctxt.normalize_to_macros_2_0_and_adjust(expn_id);
962 *self = Span::new(span.lo, span.hi, span.ctxt, span.parent);
963 mark
964 }
965
966 #[inline]
glob_adjust(&mut self, expn_id: ExpnId, glob_span: Span) -> Option<Option<ExpnId>>967 pub fn glob_adjust(&mut self, expn_id: ExpnId, glob_span: Span) -> Option<Option<ExpnId>> {
968 let mut span = self.data();
969 let mark = span.ctxt.glob_adjust(expn_id, glob_span);
970 *self = Span::new(span.lo, span.hi, span.ctxt, span.parent);
971 mark
972 }
973
974 #[inline]
reverse_glob_adjust( &mut self, expn_id: ExpnId, glob_span: Span, ) -> Option<Option<ExpnId>>975 pub fn reverse_glob_adjust(
976 &mut self,
977 expn_id: ExpnId,
978 glob_span: Span,
979 ) -> Option<Option<ExpnId>> {
980 let mut span = self.data();
981 let mark = span.ctxt.reverse_glob_adjust(expn_id, glob_span);
982 *self = Span::new(span.lo, span.hi, span.ctxt, span.parent);
983 mark
984 }
985
986 #[inline]
normalize_to_macros_2_0(self) -> Span987 pub fn normalize_to_macros_2_0(self) -> Span {
988 let span = self.data();
989 span.with_ctxt(span.ctxt.normalize_to_macros_2_0())
990 }
991
992 #[inline]
normalize_to_macro_rules(self) -> Span993 pub fn normalize_to_macro_rules(self) -> Span {
994 let span = self.data();
995 span.with_ctxt(span.ctxt.normalize_to_macro_rules())
996 }
997 }
998
999 impl Default for Span {
default() -> Self1000 fn default() -> Self {
1001 DUMMY_SP
1002 }
1003 }
1004
1005 impl<E: Encoder> Encodable<E> for Span {
encode(&self, s: &mut E)1006 default fn encode(&self, s: &mut E) {
1007 let span = self.data();
1008 span.lo.encode(s);
1009 span.hi.encode(s);
1010 }
1011 }
1012 impl<D: Decoder> Decodable<D> for Span {
decode(s: &mut D) -> Span1013 default fn decode(s: &mut D) -> Span {
1014 let lo = Decodable::decode(s);
1015 let hi = Decodable::decode(s);
1016
1017 Span::new(lo, hi, SyntaxContext::root(), None)
1018 }
1019 }
1020
1021 /// Insert `source_map` into the session globals for the duration of the
1022 /// closure's execution.
set_source_map<T, F: FnOnce() -> T>(source_map: Lrc<SourceMap>, f: F) -> T1023 pub fn set_source_map<T, F: FnOnce() -> T>(source_map: Lrc<SourceMap>, f: F) -> T {
1024 with_session_globals(|session_globals| {
1025 *session_globals.source_map.borrow_mut() = Some(source_map);
1026 });
1027 struct ClearSourceMap;
1028 impl Drop for ClearSourceMap {
1029 fn drop(&mut self) {
1030 with_session_globals(|session_globals| {
1031 session_globals.source_map.borrow_mut().take();
1032 });
1033 }
1034 }
1035
1036 let _guard = ClearSourceMap;
1037 f()
1038 }
1039
1040 impl fmt::Debug for Span {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1041 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1042 // Use the global `SourceMap` to print the span. If that's not
1043 // available, fall back to printing the raw values.
1044
1045 fn fallback(span: Span, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1046 f.debug_struct("Span")
1047 .field("lo", &span.lo())
1048 .field("hi", &span.hi())
1049 .field("ctxt", &span.ctxt())
1050 .finish()
1051 }
1052
1053 if SESSION_GLOBALS.is_set() {
1054 with_session_globals(|session_globals| {
1055 if let Some(source_map) = &*session_globals.source_map.borrow() {
1056 write!(f, "{} ({:?})", source_map.span_to_diagnostic_string(*self), self.ctxt())
1057 } else {
1058 fallback(*self, f)
1059 }
1060 })
1061 } else {
1062 fallback(*self, f)
1063 }
1064 }
1065 }
1066
1067 impl fmt::Debug for SpanData {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1068 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1069 fmt::Debug::fmt(&Span::new(self.lo, self.hi, self.ctxt, self.parent), f)
1070 }
1071 }
1072
1073 /// Identifies an offset of a multi-byte character in a `SourceFile`.
1074 #[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)]
1075 pub struct MultiByteChar {
1076 /// The absolute offset of the character in the `SourceMap`.
1077 pub pos: BytePos,
1078 /// The number of bytes, `>= 2`.
1079 pub bytes: u8,
1080 }
1081
1082 /// Identifies an offset of a non-narrow character in a `SourceFile`.
1083 #[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)]
1084 pub enum NonNarrowChar {
1085 /// Represents a zero-width character.
1086 ZeroWidth(BytePos),
1087 /// Represents a wide (full-width) character.
1088 Wide(BytePos),
1089 /// Represents a tab character, represented visually with a width of 4 characters.
1090 Tab(BytePos),
1091 }
1092
1093 impl NonNarrowChar {
new(pos: BytePos, width: usize) -> Self1094 fn new(pos: BytePos, width: usize) -> Self {
1095 match width {
1096 0 => NonNarrowChar::ZeroWidth(pos),
1097 2 => NonNarrowChar::Wide(pos),
1098 4 => NonNarrowChar::Tab(pos),
1099 _ => panic!("width {width} given for non-narrow character"),
1100 }
1101 }
1102
1103 /// Returns the absolute offset of the character in the `SourceMap`.
pos(&self) -> BytePos1104 pub fn pos(&self) -> BytePos {
1105 match *self {
1106 NonNarrowChar::ZeroWidth(p) | NonNarrowChar::Wide(p) | NonNarrowChar::Tab(p) => p,
1107 }
1108 }
1109
1110 /// Returns the width of the character, 0 (zero-width) or 2 (wide).
width(&self) -> usize1111 pub fn width(&self) -> usize {
1112 match *self {
1113 NonNarrowChar::ZeroWidth(_) => 0,
1114 NonNarrowChar::Wide(_) => 2,
1115 NonNarrowChar::Tab(_) => 4,
1116 }
1117 }
1118 }
1119
1120 impl Add<BytePos> for NonNarrowChar {
1121 type Output = Self;
1122
add(self, rhs: BytePos) -> Self1123 fn add(self, rhs: BytePos) -> Self {
1124 match self {
1125 NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos + rhs),
1126 NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos + rhs),
1127 NonNarrowChar::Tab(pos) => NonNarrowChar::Tab(pos + rhs),
1128 }
1129 }
1130 }
1131
1132 impl Sub<BytePos> for NonNarrowChar {
1133 type Output = Self;
1134
sub(self, rhs: BytePos) -> Self1135 fn sub(self, rhs: BytePos) -> Self {
1136 match self {
1137 NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos - rhs),
1138 NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos - rhs),
1139 NonNarrowChar::Tab(pos) => NonNarrowChar::Tab(pos - rhs),
1140 }
1141 }
1142 }
1143
1144 /// Identifies an offset of a character that was normalized away from `SourceFile`.
1145 #[derive(Copy, Clone, Encodable, Decodable, Eq, PartialEq, Debug)]
1146 pub struct NormalizedPos {
1147 /// The absolute offset of the character in the `SourceMap`.
1148 pub pos: BytePos,
1149 /// The difference between original and normalized string at position.
1150 pub diff: u32,
1151 }
1152
1153 #[derive(PartialEq, Eq, Clone, Debug)]
1154 pub enum ExternalSource {
1155 /// No external source has to be loaded, since the `SourceFile` represents a local crate.
1156 Unneeded,
1157 Foreign {
1158 kind: ExternalSourceKind,
1159 /// Index of the file inside metadata.
1160 metadata_index: u32,
1161 },
1162 }
1163
1164 /// The state of the lazy external source loading mechanism of a `SourceFile`.
1165 #[derive(PartialEq, Eq, Clone, Debug)]
1166 pub enum ExternalSourceKind {
1167 /// The external source has been loaded already.
1168 Present(Lrc<String>),
1169 /// No attempt has been made to load the external source.
1170 AbsentOk,
1171 /// A failed attempt has been made to load the external source.
1172 AbsentErr,
1173 Unneeded,
1174 }
1175
1176 impl ExternalSource {
get_source(&self) -> Option<&Lrc<String>>1177 pub fn get_source(&self) -> Option<&Lrc<String>> {
1178 match self {
1179 ExternalSource::Foreign { kind: ExternalSourceKind::Present(ref src), .. } => Some(src),
1180 _ => None,
1181 }
1182 }
1183 }
1184
1185 #[derive(Debug)]
1186 pub struct OffsetOverflowError;
1187
1188 #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Encodable, Decodable)]
1189 #[derive(HashStable_Generic)]
1190 pub enum SourceFileHashAlgorithm {
1191 Md5,
1192 Sha1,
1193 Sha256,
1194 }
1195
1196 impl FromStr for SourceFileHashAlgorithm {
1197 type Err = ();
1198
from_str(s: &str) -> Result<SourceFileHashAlgorithm, ()>1199 fn from_str(s: &str) -> Result<SourceFileHashAlgorithm, ()> {
1200 match s {
1201 "md5" => Ok(SourceFileHashAlgorithm::Md5),
1202 "sha1" => Ok(SourceFileHashAlgorithm::Sha1),
1203 "sha256" => Ok(SourceFileHashAlgorithm::Sha256),
1204 _ => Err(()),
1205 }
1206 }
1207 }
1208
1209 /// The hash of the on-disk source file used for debug info.
1210 #[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
1211 #[derive(HashStable_Generic, Encodable, Decodable)]
1212 pub struct SourceFileHash {
1213 pub kind: SourceFileHashAlgorithm,
1214 value: [u8; 32],
1215 }
1216
1217 impl SourceFileHash {
new(kind: SourceFileHashAlgorithm, src: &str) -> SourceFileHash1218 pub fn new(kind: SourceFileHashAlgorithm, src: &str) -> SourceFileHash {
1219 let mut hash = SourceFileHash { kind, value: Default::default() };
1220 let len = hash.hash_len();
1221 let value = &mut hash.value[..len];
1222 let data = src.as_bytes();
1223 match kind {
1224 SourceFileHashAlgorithm::Md5 => {
1225 value.copy_from_slice(&Md5::digest(data));
1226 }
1227 SourceFileHashAlgorithm::Sha1 => {
1228 value.copy_from_slice(&Sha1::digest(data));
1229 }
1230 SourceFileHashAlgorithm::Sha256 => {
1231 value.copy_from_slice(&Sha256::digest(data));
1232 }
1233 }
1234 hash
1235 }
1236
1237 /// Check if the stored hash matches the hash of the string.
matches(&self, src: &str) -> bool1238 pub fn matches(&self, src: &str) -> bool {
1239 Self::new(self.kind, src) == *self
1240 }
1241
1242 /// The bytes of the hash.
hash_bytes(&self) -> &[u8]1243 pub fn hash_bytes(&self) -> &[u8] {
1244 let len = self.hash_len();
1245 &self.value[..len]
1246 }
1247
hash_len(&self) -> usize1248 fn hash_len(&self) -> usize {
1249 match self.kind {
1250 SourceFileHashAlgorithm::Md5 => 16,
1251 SourceFileHashAlgorithm::Sha1 => 20,
1252 SourceFileHashAlgorithm::Sha256 => 32,
1253 }
1254 }
1255 }
1256
1257 #[derive(Clone)]
1258 pub enum SourceFileLines {
1259 /// The source file lines, in decoded (random-access) form.
1260 Lines(Vec<BytePos>),
1261
1262 /// The source file lines, in undecoded difference list form.
1263 Diffs(SourceFileDiffs),
1264 }
1265
1266 impl SourceFileLines {
is_lines(&self) -> bool1267 pub fn is_lines(&self) -> bool {
1268 matches!(self, SourceFileLines::Lines(_))
1269 }
1270 }
1271
1272 /// The source file lines in difference list form. This matches the form
1273 /// used within metadata, which saves space by exploiting the fact that the
1274 /// lines list is sorted and individual lines are usually not that long.
1275 ///
1276 /// We read it directly from metadata and only decode it into `Lines` form
1277 /// when necessary. This is a significant performance win, especially for
1278 /// small crates where very little of `std`'s metadata is used.
1279 #[derive(Clone)]
1280 pub struct SourceFileDiffs {
1281 /// Position of the first line. Note that this is always encoded as a
1282 /// `BytePos` because it is often much larger than any of the
1283 /// differences.
1284 line_start: BytePos,
1285
1286 /// Always 1, 2, or 4. Always as small as possible, while being big
1287 /// enough to hold the length of the longest line in the source file.
1288 /// The 1 case is by far the most common.
1289 bytes_per_diff: usize,
1290
1291 /// The number of diffs encoded in `raw_diffs`. Always one less than
1292 /// the number of lines in the source file.
1293 num_diffs: usize,
1294
1295 /// The diffs in "raw" form. Each segment of `bytes_per_diff` length
1296 /// encodes one little-endian diff. Note that they aren't LEB128
1297 /// encoded. This makes for much faster decoding. Besides, the
1298 /// bytes_per_diff==1 case is by far the most common, and LEB128
1299 /// encoding has no effect on that case.
1300 raw_diffs: Vec<u8>,
1301 }
1302
1303 /// A single source in the [`SourceMap`].
1304 pub struct SourceFile {
1305 /// The name of the file that the source came from. Source that doesn't
1306 /// originate from files has names between angle brackets by convention
1307 /// (e.g., `<anon>`).
1308 pub name: FileName,
1309 /// The complete source code.
1310 pub src: Option<Lrc<String>>,
1311 /// The source code's hash.
1312 pub src_hash: SourceFileHash,
1313 /// The external source code (used for external crates, which will have a `None`
1314 /// value as `self.src`.
1315 pub external_src: Lock<ExternalSource>,
1316 /// The start position of this source in the `SourceMap`.
1317 pub start_pos: BytePos,
1318 /// The end position of this source in the `SourceMap`.
1319 pub end_pos: BytePos,
1320 /// Locations of lines beginnings in the source code.
1321 pub lines: Lock<SourceFileLines>,
1322 /// Locations of multi-byte characters in the source code.
1323 pub multibyte_chars: Vec<MultiByteChar>,
1324 /// Width of characters that are not narrow in the source code.
1325 pub non_narrow_chars: Vec<NonNarrowChar>,
1326 /// Locations of characters removed during normalization.
1327 pub normalized_pos: Vec<NormalizedPos>,
1328 /// A hash of the filename, used for speeding up hashing in incremental compilation.
1329 pub name_hash: Hash128,
1330 /// Indicates which crate this `SourceFile` was imported from.
1331 pub cnum: CrateNum,
1332 }
1333
1334 impl Clone for SourceFile {
clone(&self) -> Self1335 fn clone(&self) -> Self {
1336 Self {
1337 name: self.name.clone(),
1338 src: self.src.clone(),
1339 src_hash: self.src_hash,
1340 external_src: Lock::new(self.external_src.borrow().clone()),
1341 start_pos: self.start_pos,
1342 end_pos: self.end_pos,
1343 lines: Lock::new(self.lines.borrow().clone()),
1344 multibyte_chars: self.multibyte_chars.clone(),
1345 non_narrow_chars: self.non_narrow_chars.clone(),
1346 normalized_pos: self.normalized_pos.clone(),
1347 name_hash: self.name_hash,
1348 cnum: self.cnum,
1349 }
1350 }
1351 }
1352
1353 impl<S: Encoder> Encodable<S> for SourceFile {
encode(&self, s: &mut S)1354 fn encode(&self, s: &mut S) {
1355 self.name.encode(s);
1356 self.src_hash.encode(s);
1357 self.start_pos.encode(s);
1358 self.end_pos.encode(s);
1359
1360 // We are always in `Lines` form by the time we reach here.
1361 assert!(self.lines.borrow().is_lines());
1362 self.lines(|lines| {
1363 // Store the length.
1364 s.emit_u32(lines.len() as u32);
1365
1366 // Compute and store the difference list.
1367 if lines.len() != 0 {
1368 let max_line_length = if lines.len() == 1 {
1369 0
1370 } else {
1371 lines
1372 .array_windows()
1373 .map(|&[fst, snd]| snd - fst)
1374 .map(|bp| bp.to_usize())
1375 .max()
1376 .unwrap()
1377 };
1378
1379 let bytes_per_diff: usize = match max_line_length {
1380 0..=0xFF => 1,
1381 0x100..=0xFFFF => 2,
1382 _ => 4,
1383 };
1384
1385 // Encode the number of bytes used per diff.
1386 s.emit_u8(bytes_per_diff as u8);
1387
1388 // Encode the first element.
1389 lines[0].encode(s);
1390
1391 // Encode the difference list.
1392 let diff_iter = lines.array_windows().map(|&[fst, snd]| snd - fst);
1393 let num_diffs = lines.len() - 1;
1394 let mut raw_diffs;
1395 match bytes_per_diff {
1396 1 => {
1397 raw_diffs = Vec::with_capacity(num_diffs);
1398 for diff in diff_iter {
1399 raw_diffs.push(diff.0 as u8);
1400 }
1401 }
1402 2 => {
1403 raw_diffs = Vec::with_capacity(bytes_per_diff * num_diffs);
1404 for diff in diff_iter {
1405 raw_diffs.extend_from_slice(&(diff.0 as u16).to_le_bytes());
1406 }
1407 }
1408 4 => {
1409 raw_diffs = Vec::with_capacity(bytes_per_diff * num_diffs);
1410 for diff in diff_iter {
1411 raw_diffs.extend_from_slice(&(diff.0).to_le_bytes());
1412 }
1413 }
1414 _ => unreachable!(),
1415 }
1416 s.emit_raw_bytes(&raw_diffs);
1417 }
1418 });
1419
1420 self.multibyte_chars.encode(s);
1421 self.non_narrow_chars.encode(s);
1422 self.name_hash.encode(s);
1423 self.normalized_pos.encode(s);
1424 self.cnum.encode(s);
1425 }
1426 }
1427
1428 impl<D: Decoder> Decodable<D> for SourceFile {
decode(d: &mut D) -> SourceFile1429 fn decode(d: &mut D) -> SourceFile {
1430 let name: FileName = Decodable::decode(d);
1431 let src_hash: SourceFileHash = Decodable::decode(d);
1432 let start_pos: BytePos = Decodable::decode(d);
1433 let end_pos: BytePos = Decodable::decode(d);
1434 let lines = {
1435 let num_lines: u32 = Decodable::decode(d);
1436 if num_lines > 0 {
1437 // Read the number of bytes used per diff.
1438 let bytes_per_diff = d.read_u8() as usize;
1439
1440 // Read the first element.
1441 let line_start: BytePos = Decodable::decode(d);
1442
1443 // Read the difference list.
1444 let num_diffs = num_lines as usize - 1;
1445 let raw_diffs = d.read_raw_bytes(bytes_per_diff * num_diffs).to_vec();
1446 SourceFileLines::Diffs(SourceFileDiffs {
1447 line_start,
1448 bytes_per_diff,
1449 num_diffs,
1450 raw_diffs,
1451 })
1452 } else {
1453 SourceFileLines::Lines(vec![])
1454 }
1455 };
1456 let multibyte_chars: Vec<MultiByteChar> = Decodable::decode(d);
1457 let non_narrow_chars: Vec<NonNarrowChar> = Decodable::decode(d);
1458 let name_hash = Decodable::decode(d);
1459 let normalized_pos: Vec<NormalizedPos> = Decodable::decode(d);
1460 let cnum: CrateNum = Decodable::decode(d);
1461 SourceFile {
1462 name,
1463 start_pos,
1464 end_pos,
1465 src: None,
1466 src_hash,
1467 // Unused - the metadata decoder will construct
1468 // a new SourceFile, filling in `external_src` properly
1469 external_src: Lock::new(ExternalSource::Unneeded),
1470 lines: Lock::new(lines),
1471 multibyte_chars,
1472 non_narrow_chars,
1473 normalized_pos,
1474 name_hash,
1475 cnum,
1476 }
1477 }
1478 }
1479
1480 impl fmt::Debug for SourceFile {
fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result1481 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
1482 write!(fmt, "SourceFile({:?})", self.name)
1483 }
1484 }
1485
1486 impl SourceFile {
new( name: FileName, mut src: String, start_pos: BytePos, hash_kind: SourceFileHashAlgorithm, ) -> Self1487 pub fn new(
1488 name: FileName,
1489 mut src: String,
1490 start_pos: BytePos,
1491 hash_kind: SourceFileHashAlgorithm,
1492 ) -> Self {
1493 // Compute the file hash before any normalization.
1494 let src_hash = SourceFileHash::new(hash_kind, &src);
1495 let normalized_pos = normalize_src(&mut src, start_pos);
1496
1497 let name_hash = {
1498 let mut hasher: StableHasher = StableHasher::new();
1499 name.hash(&mut hasher);
1500 hasher.finish()
1501 };
1502 let end_pos = start_pos.to_usize() + src.len();
1503 assert!(end_pos <= u32::MAX as usize);
1504
1505 let (lines, multibyte_chars, non_narrow_chars) =
1506 analyze_source_file::analyze_source_file(&src, start_pos);
1507
1508 SourceFile {
1509 name,
1510 src: Some(Lrc::new(src)),
1511 src_hash,
1512 external_src: Lock::new(ExternalSource::Unneeded),
1513 start_pos,
1514 end_pos: Pos::from_usize(end_pos),
1515 lines: Lock::new(SourceFileLines::Lines(lines)),
1516 multibyte_chars,
1517 non_narrow_chars,
1518 normalized_pos,
1519 name_hash,
1520 cnum: LOCAL_CRATE,
1521 }
1522 }
1523
lines<F, R>(&self, f: F) -> R where F: FnOnce(&[BytePos]) -> R,1524 pub fn lines<F, R>(&self, f: F) -> R
1525 where
1526 F: FnOnce(&[BytePos]) -> R,
1527 {
1528 let mut guard = self.lines.borrow_mut();
1529 match &*guard {
1530 SourceFileLines::Lines(lines) => f(lines),
1531 SourceFileLines::Diffs(SourceFileDiffs {
1532 mut line_start,
1533 bytes_per_diff,
1534 num_diffs,
1535 raw_diffs,
1536 }) => {
1537 // Convert from "diffs" form to "lines" form.
1538 let num_lines = num_diffs + 1;
1539 let mut lines = Vec::with_capacity(num_lines);
1540 lines.push(line_start);
1541
1542 assert_eq!(*num_diffs, raw_diffs.len() / bytes_per_diff);
1543 match bytes_per_diff {
1544 1 => {
1545 lines.extend(raw_diffs.into_iter().map(|&diff| {
1546 line_start = line_start + BytePos(diff as u32);
1547 line_start
1548 }));
1549 }
1550 2 => {
1551 lines.extend((0..*num_diffs).map(|i| {
1552 let pos = bytes_per_diff * i;
1553 let bytes = [raw_diffs[pos], raw_diffs[pos + 1]];
1554 let diff = u16::from_le_bytes(bytes);
1555 line_start = line_start + BytePos(diff as u32);
1556 line_start
1557 }));
1558 }
1559 4 => {
1560 lines.extend((0..*num_diffs).map(|i| {
1561 let pos = bytes_per_diff * i;
1562 let bytes = [
1563 raw_diffs[pos],
1564 raw_diffs[pos + 1],
1565 raw_diffs[pos + 2],
1566 raw_diffs[pos + 3],
1567 ];
1568 let diff = u32::from_le_bytes(bytes);
1569 line_start = line_start + BytePos(diff);
1570 line_start
1571 }));
1572 }
1573 _ => unreachable!(),
1574 }
1575 let res = f(&lines);
1576 *guard = SourceFileLines::Lines(lines);
1577 res
1578 }
1579 }
1580 }
1581
1582 /// Returns the `BytePos` of the beginning of the current line.
line_begin_pos(&self, pos: BytePos) -> BytePos1583 pub fn line_begin_pos(&self, pos: BytePos) -> BytePos {
1584 let line_index = self.lookup_line(pos).unwrap();
1585 self.lines(|lines| lines[line_index])
1586 }
1587
1588 /// Add externally loaded source.
1589 /// If the hash of the input doesn't match or no input is supplied via None,
1590 /// it is interpreted as an error and the corresponding enum variant is set.
1591 /// The return value signifies whether some kind of source is present.
add_external_src<F>(&self, get_src: F) -> bool where F: FnOnce() -> Option<String>,1592 pub fn add_external_src<F>(&self, get_src: F) -> bool
1593 where
1594 F: FnOnce() -> Option<String>,
1595 {
1596 if matches!(
1597 *self.external_src.borrow(),
1598 ExternalSource::Foreign { kind: ExternalSourceKind::AbsentOk, .. }
1599 ) {
1600 let src = get_src();
1601 let mut external_src = self.external_src.borrow_mut();
1602 // Check that no-one else have provided the source while we were getting it
1603 if let ExternalSource::Foreign {
1604 kind: src_kind @ ExternalSourceKind::AbsentOk, ..
1605 } = &mut *external_src
1606 {
1607 if let Some(mut src) = src {
1608 // The src_hash needs to be computed on the pre-normalized src.
1609 if self.src_hash.matches(&src) {
1610 normalize_src(&mut src, BytePos::from_usize(0));
1611 *src_kind = ExternalSourceKind::Present(Lrc::new(src));
1612 return true;
1613 }
1614 } else {
1615 *src_kind = ExternalSourceKind::AbsentErr;
1616 }
1617
1618 false
1619 } else {
1620 self.src.is_some() || external_src.get_source().is_some()
1621 }
1622 } else {
1623 self.src.is_some() || self.external_src.borrow().get_source().is_some()
1624 }
1625 }
1626
1627 /// Gets a line from the list of pre-computed line-beginnings.
1628 /// The line number here is 0-based.
get_line(&self, line_number: usize) -> Option<Cow<'_, str>>1629 pub fn get_line(&self, line_number: usize) -> Option<Cow<'_, str>> {
1630 fn get_until_newline(src: &str, begin: usize) -> &str {
1631 // We can't use `lines.get(line_number+1)` because we might
1632 // be parsing when we call this function and thus the current
1633 // line is the last one we have line info for.
1634 let slice = &src[begin..];
1635 match slice.find('\n') {
1636 Some(e) => &slice[..e],
1637 None => slice,
1638 }
1639 }
1640
1641 let begin = {
1642 let line = self.lines(|lines| lines.get(line_number).copied())?;
1643 let begin: BytePos = line - self.start_pos;
1644 begin.to_usize()
1645 };
1646
1647 if let Some(ref src) = self.src {
1648 Some(Cow::from(get_until_newline(src, begin)))
1649 } else {
1650 self.external_src
1651 .borrow()
1652 .get_source()
1653 .map(|src| Cow::Owned(String::from(get_until_newline(src, begin))))
1654 }
1655 }
1656
is_real_file(&self) -> bool1657 pub fn is_real_file(&self) -> bool {
1658 self.name.is_real()
1659 }
1660
1661 #[inline]
is_imported(&self) -> bool1662 pub fn is_imported(&self) -> bool {
1663 self.src.is_none()
1664 }
1665
count_lines(&self) -> usize1666 pub fn count_lines(&self) -> usize {
1667 self.lines(|lines| lines.len())
1668 }
1669
1670 /// Finds the line containing the given position. The return value is the
1671 /// index into the `lines` array of this `SourceFile`, not the 1-based line
1672 /// number. If the source_file is empty or the position is located before the
1673 /// first line, `None` is returned.
lookup_line(&self, pos: BytePos) -> Option<usize>1674 pub fn lookup_line(&self, pos: BytePos) -> Option<usize> {
1675 self.lines(|lines| lines.partition_point(|x| x <= &pos).checked_sub(1))
1676 }
1677
line_bounds(&self, line_index: usize) -> Range<BytePos>1678 pub fn line_bounds(&self, line_index: usize) -> Range<BytePos> {
1679 if self.is_empty() {
1680 return self.start_pos..self.end_pos;
1681 }
1682
1683 self.lines(|lines| {
1684 assert!(line_index < lines.len());
1685 if line_index == (lines.len() - 1) {
1686 lines[line_index]..self.end_pos
1687 } else {
1688 lines[line_index]..lines[line_index + 1]
1689 }
1690 })
1691 }
1692
1693 /// Returns whether or not the file contains the given `SourceMap` byte
1694 /// position. The position one past the end of the file is considered to be
1695 /// contained by the file. This implies that files for which `is_empty`
1696 /// returns true still contain one byte position according to this function.
1697 #[inline]
contains(&self, byte_pos: BytePos) -> bool1698 pub fn contains(&self, byte_pos: BytePos) -> bool {
1699 byte_pos >= self.start_pos && byte_pos <= self.end_pos
1700 }
1701
1702 #[inline]
is_empty(&self) -> bool1703 pub fn is_empty(&self) -> bool {
1704 self.start_pos == self.end_pos
1705 }
1706
1707 /// Calculates the original byte position relative to the start of the file
1708 /// based on the given byte position.
original_relative_byte_pos(&self, pos: BytePos) -> BytePos1709 pub fn original_relative_byte_pos(&self, pos: BytePos) -> BytePos {
1710 // Diff before any records is 0. Otherwise use the previously recorded
1711 // diff as that applies to the following characters until a new diff
1712 // is recorded.
1713 let diff = match self.normalized_pos.binary_search_by(|np| np.pos.cmp(&pos)) {
1714 Ok(i) => self.normalized_pos[i].diff,
1715 Err(i) if i == 0 => 0,
1716 Err(i) => self.normalized_pos[i - 1].diff,
1717 };
1718
1719 BytePos::from_u32(pos.0 - self.start_pos.0 + diff)
1720 }
1721
1722 /// Calculates a normalized byte position from a byte offset relative to the
1723 /// start of the file.
1724 ///
1725 /// When we get an inline assembler error from LLVM during codegen, we
1726 /// import the expanded assembly code as a new `SourceFile`, which can then
1727 /// be used for error reporting with spans. However the byte offsets given
1728 /// to us by LLVM are relative to the start of the original buffer, not the
1729 /// normalized one. Hence we need to convert those offsets to the normalized
1730 /// form when constructing spans.
normalized_byte_pos(&self, offset: u32) -> BytePos1731 pub fn normalized_byte_pos(&self, offset: u32) -> BytePos {
1732 let diff = match self
1733 .normalized_pos
1734 .binary_search_by(|np| (np.pos.0 + np.diff).cmp(&(self.start_pos.0 + offset)))
1735 {
1736 Ok(i) => self.normalized_pos[i].diff,
1737 Err(i) if i == 0 => 0,
1738 Err(i) => self.normalized_pos[i - 1].diff,
1739 };
1740
1741 BytePos::from_u32(self.start_pos.0 + offset - diff)
1742 }
1743
1744 /// Converts an absolute `BytePos` to a `CharPos` relative to the `SourceFile`.
bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos1745 pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos {
1746 // The number of extra bytes due to multibyte chars in the `SourceFile`.
1747 let mut total_extra_bytes = 0;
1748
1749 for mbc in self.multibyte_chars.iter() {
1750 debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos);
1751 if mbc.pos < bpos {
1752 // Every character is at least one byte, so we only
1753 // count the actual extra bytes.
1754 total_extra_bytes += mbc.bytes as u32 - 1;
1755 // We should never see a byte position in the middle of a
1756 // character.
1757 assert!(bpos.to_u32() >= mbc.pos.to_u32() + mbc.bytes as u32);
1758 } else {
1759 break;
1760 }
1761 }
1762
1763 assert!(self.start_pos.to_u32() + total_extra_bytes <= bpos.to_u32());
1764 CharPos(bpos.to_usize() - self.start_pos.to_usize() - total_extra_bytes as usize)
1765 }
1766
1767 /// Looks up the file's (1-based) line number and (0-based `CharPos`) column offset, for a
1768 /// given `BytePos`.
lookup_file_pos(&self, pos: BytePos) -> (usize, CharPos)1769 pub fn lookup_file_pos(&self, pos: BytePos) -> (usize, CharPos) {
1770 let chpos = self.bytepos_to_file_charpos(pos);
1771 match self.lookup_line(pos) {
1772 Some(a) => {
1773 let line = a + 1; // Line numbers start at 1
1774 let linebpos = self.lines(|lines| lines[a]);
1775 let linechpos = self.bytepos_to_file_charpos(linebpos);
1776 let col = chpos - linechpos;
1777 debug!("byte pos {:?} is on the line at byte pos {:?}", pos, linebpos);
1778 debug!("char pos {:?} is on the line at char pos {:?}", chpos, linechpos);
1779 debug!("byte is on line: {}", line);
1780 assert!(chpos >= linechpos);
1781 (line, col)
1782 }
1783 None => (0, chpos),
1784 }
1785 }
1786
1787 /// Looks up the file's (1-based) line number, (0-based `CharPos`) column offset, and (0-based)
1788 /// column offset when displayed, for a given `BytePos`.
lookup_file_pos_with_col_display(&self, pos: BytePos) -> (usize, CharPos, usize)1789 pub fn lookup_file_pos_with_col_display(&self, pos: BytePos) -> (usize, CharPos, usize) {
1790 let (line, col_or_chpos) = self.lookup_file_pos(pos);
1791 if line > 0 {
1792 let col = col_or_chpos;
1793 let linebpos = self.lines(|lines| lines[line - 1]);
1794 let col_display = {
1795 let start_width_idx = self
1796 .non_narrow_chars
1797 .binary_search_by_key(&linebpos, |x| x.pos())
1798 .unwrap_or_else(|x| x);
1799 let end_width_idx = self
1800 .non_narrow_chars
1801 .binary_search_by_key(&pos, |x| x.pos())
1802 .unwrap_or_else(|x| x);
1803 let special_chars = end_width_idx - start_width_idx;
1804 let non_narrow: usize = self.non_narrow_chars[start_width_idx..end_width_idx]
1805 .iter()
1806 .map(|x| x.width())
1807 .sum();
1808 col.0 - special_chars + non_narrow
1809 };
1810 (line, col, col_display)
1811 } else {
1812 let chpos = col_or_chpos;
1813 let col_display = {
1814 let end_width_idx = self
1815 .non_narrow_chars
1816 .binary_search_by_key(&pos, |x| x.pos())
1817 .unwrap_or_else(|x| x);
1818 let non_narrow: usize =
1819 self.non_narrow_chars[0..end_width_idx].iter().map(|x| x.width()).sum();
1820 chpos.0 - end_width_idx + non_narrow
1821 };
1822 (0, chpos, col_display)
1823 }
1824 }
1825 }
1826
1827 /// Normalizes the source code and records the normalizations.
normalize_src(src: &mut String, start_pos: BytePos) -> Vec<NormalizedPos>1828 fn normalize_src(src: &mut String, start_pos: BytePos) -> Vec<NormalizedPos> {
1829 let mut normalized_pos = vec![];
1830 remove_bom(src, &mut normalized_pos);
1831 normalize_newlines(src, &mut normalized_pos);
1832
1833 // Offset all the positions by start_pos to match the final file positions.
1834 for np in &mut normalized_pos {
1835 np.pos.0 += start_pos.0;
1836 }
1837
1838 normalized_pos
1839 }
1840
1841 /// Removes UTF-8 BOM, if any.
remove_bom(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>)1842 fn remove_bom(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) {
1843 if src.starts_with('\u{feff}') {
1844 src.drain(..3);
1845 normalized_pos.push(NormalizedPos { pos: BytePos(0), diff: 3 });
1846 }
1847 }
1848
1849 /// Replaces `\r\n` with `\n` in-place in `src`.
1850 ///
1851 /// Returns error if there's a lone `\r` in the string.
normalize_newlines(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>)1852 fn normalize_newlines(src: &mut String, normalized_pos: &mut Vec<NormalizedPos>) {
1853 if !src.as_bytes().contains(&b'\r') {
1854 return;
1855 }
1856
1857 // We replace `\r\n` with `\n` in-place, which doesn't break utf-8 encoding.
1858 // While we *can* call `as_mut_vec` and do surgery on the live string
1859 // directly, let's rather steal the contents of `src`. This makes the code
1860 // safe even if a panic occurs.
1861
1862 let mut buf = std::mem::replace(src, String::new()).into_bytes();
1863 let mut gap_len = 0;
1864 let mut tail = buf.as_mut_slice();
1865 let mut cursor = 0;
1866 let original_gap = normalized_pos.last().map_or(0, |l| l.diff);
1867 loop {
1868 let idx = match find_crlf(&tail[gap_len..]) {
1869 None => tail.len(),
1870 Some(idx) => idx + gap_len,
1871 };
1872 tail.copy_within(gap_len..idx, 0);
1873 tail = &mut tail[idx - gap_len..];
1874 if tail.len() == gap_len {
1875 break;
1876 }
1877 cursor += idx - gap_len;
1878 gap_len += 1;
1879 normalized_pos.push(NormalizedPos {
1880 pos: BytePos::from_usize(cursor + 1),
1881 diff: original_gap + gap_len as u32,
1882 });
1883 }
1884
1885 // Account for removed `\r`.
1886 // After `set_len`, `buf` is guaranteed to contain utf-8 again.
1887 let new_len = buf.len() - gap_len;
1888 unsafe {
1889 buf.set_len(new_len);
1890 *src = String::from_utf8_unchecked(buf);
1891 }
1892
1893 fn find_crlf(src: &[u8]) -> Option<usize> {
1894 let mut search_idx = 0;
1895 while let Some(idx) = find_cr(&src[search_idx..]) {
1896 if src[search_idx..].get(idx + 1) != Some(&b'\n') {
1897 search_idx += idx + 1;
1898 continue;
1899 }
1900 return Some(search_idx + idx);
1901 }
1902 None
1903 }
1904
1905 fn find_cr(src: &[u8]) -> Option<usize> {
1906 src.iter().position(|&b| b == b'\r')
1907 }
1908 }
1909
1910 // _____________________________________________________________________________
1911 // Pos, BytePos, CharPos
1912 //
1913
1914 pub trait Pos {
from_usize(n: usize) -> Self1915 fn from_usize(n: usize) -> Self;
to_usize(&self) -> usize1916 fn to_usize(&self) -> usize;
from_u32(n: u32) -> Self1917 fn from_u32(n: u32) -> Self;
to_u32(&self) -> u321918 fn to_u32(&self) -> u32;
1919 }
1920
1921 macro_rules! impl_pos {
1922 (
1923 $(
1924 $(#[$attr:meta])*
1925 $vis:vis struct $ident:ident($inner_vis:vis $inner_ty:ty);
1926 )*
1927 ) => {
1928 $(
1929 $(#[$attr])*
1930 $vis struct $ident($inner_vis $inner_ty);
1931
1932 impl Pos for $ident {
1933 #[inline(always)]
1934 fn from_usize(n: usize) -> $ident {
1935 $ident(n as $inner_ty)
1936 }
1937
1938 #[inline(always)]
1939 fn to_usize(&self) -> usize {
1940 self.0 as usize
1941 }
1942
1943 #[inline(always)]
1944 fn from_u32(n: u32) -> $ident {
1945 $ident(n as $inner_ty)
1946 }
1947
1948 #[inline(always)]
1949 fn to_u32(&self) -> u32 {
1950 self.0 as u32
1951 }
1952 }
1953
1954 impl Add for $ident {
1955 type Output = $ident;
1956
1957 #[inline(always)]
1958 fn add(self, rhs: $ident) -> $ident {
1959 $ident(self.0 + rhs.0)
1960 }
1961 }
1962
1963 impl Sub for $ident {
1964 type Output = $ident;
1965
1966 #[inline(always)]
1967 fn sub(self, rhs: $ident) -> $ident {
1968 $ident(self.0 - rhs.0)
1969 }
1970 }
1971 )*
1972 };
1973 }
1974
1975 impl_pos! {
1976 /// A byte offset.
1977 ///
1978 /// Keep this small (currently 32-bits), as AST contains a lot of them.
1979 #[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
1980 pub struct BytePos(pub u32);
1981
1982 /// A character offset.
1983 ///
1984 /// Because of multibyte UTF-8 characters, a byte offset
1985 /// is not equivalent to a character offset. The [`SourceMap`] will convert [`BytePos`]
1986 /// values to `CharPos` values as necessary.
1987 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
1988 pub struct CharPos(pub usize);
1989 }
1990
1991 impl<S: Encoder> Encodable<S> for BytePos {
encode(&self, s: &mut S)1992 fn encode(&self, s: &mut S) {
1993 s.emit_u32(self.0);
1994 }
1995 }
1996
1997 impl<D: Decoder> Decodable<D> for BytePos {
decode(d: &mut D) -> BytePos1998 fn decode(d: &mut D) -> BytePos {
1999 BytePos(d.read_u32())
2000 }
2001 }
2002
2003 // _____________________________________________________________________________
2004 // Loc, SourceFileAndLine, SourceFileAndBytePos
2005 //
2006
2007 /// A source code location used for error reporting.
2008 #[derive(Debug, Clone)]
2009 pub struct Loc {
2010 /// Information about the original source.
2011 pub file: Lrc<SourceFile>,
2012 /// The (1-based) line number.
2013 pub line: usize,
2014 /// The (0-based) column offset.
2015 pub col: CharPos,
2016 /// The (0-based) column offset when displayed.
2017 pub col_display: usize,
2018 }
2019
2020 // Used to be structural records.
2021 #[derive(Debug)]
2022 pub struct SourceFileAndLine {
2023 pub sf: Lrc<SourceFile>,
2024 /// Index of line, starting from 0.
2025 pub line: usize,
2026 }
2027 #[derive(Debug)]
2028 pub struct SourceFileAndBytePos {
2029 pub sf: Lrc<SourceFile>,
2030 pub pos: BytePos,
2031 }
2032
2033 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
2034 pub struct LineInfo {
2035 /// Index of line, starting from 0.
2036 pub line_index: usize,
2037
2038 /// Column in line where span begins, starting from 0.
2039 pub start_col: CharPos,
2040
2041 /// Column in line where span ends, starting from 0, exclusive.
2042 pub end_col: CharPos,
2043 }
2044
2045 pub struct FileLines {
2046 pub file: Lrc<SourceFile>,
2047 pub lines: Vec<LineInfo>,
2048 }
2049
2050 pub static SPAN_TRACK: AtomicRef<fn(LocalDefId)> = AtomicRef::new(&((|_| {}) as fn(_)));
2051
2052 // _____________________________________________________________________________
2053 // SpanLinesError, SpanSnippetError, DistinctSources, MalformedSourceMapPositions
2054 //
2055
2056 pub type FileLinesResult = Result<FileLines, SpanLinesError>;
2057
2058 #[derive(Clone, PartialEq, Eq, Debug)]
2059 pub enum SpanLinesError {
2060 DistinctSources(Box<DistinctSources>),
2061 }
2062
2063 #[derive(Clone, PartialEq, Eq, Debug)]
2064 pub enum SpanSnippetError {
2065 IllFormedSpan(Span),
2066 DistinctSources(Box<DistinctSources>),
2067 MalformedForSourcemap(MalformedSourceMapPositions),
2068 SourceNotAvailable { filename: FileName },
2069 }
2070
2071 #[derive(Clone, PartialEq, Eq, Debug)]
2072 pub struct DistinctSources {
2073 pub begin: (FileName, BytePos),
2074 pub end: (FileName, BytePos),
2075 }
2076
2077 #[derive(Clone, PartialEq, Eq, Debug)]
2078 pub struct MalformedSourceMapPositions {
2079 pub name: FileName,
2080 pub source_len: usize,
2081 pub begin_pos: BytePos,
2082 pub end_pos: BytePos,
2083 }
2084
2085 /// Range inside of a `Span` used for diagnostics when we only have access to relative positions.
2086 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
2087 pub struct InnerSpan {
2088 pub start: usize,
2089 pub end: usize,
2090 }
2091
2092 impl InnerSpan {
new(start: usize, end: usize) -> InnerSpan2093 pub fn new(start: usize, end: usize) -> InnerSpan {
2094 InnerSpan { start, end }
2095 }
2096 }
2097
2098 /// Requirements for a `StableHashingContext` to be used in this crate.
2099 ///
2100 /// This is a hack to allow using the [`HashStable_Generic`] derive macro
2101 /// instead of implementing everything in rustc_middle.
2102 pub trait HashStableContext {
def_path_hash(&self, def_id: DefId) -> DefPathHash2103 fn def_path_hash(&self, def_id: DefId) -> DefPathHash;
hash_spans(&self) -> bool2104 fn hash_spans(&self) -> bool;
2105 /// Accesses `sess.opts.unstable_opts.incremental_ignore_spans` since
2106 /// we don't have easy access to a `Session`
unstable_opts_incremental_ignore_spans(&self) -> bool2107 fn unstable_opts_incremental_ignore_spans(&self) -> bool;
def_span(&self, def_id: LocalDefId) -> Span2108 fn def_span(&self, def_id: LocalDefId) -> Span;
span_data_to_lines_and_cols( &mut self, span: &SpanData, ) -> Option<(Lrc<SourceFile>, usize, BytePos, usize, BytePos)>2109 fn span_data_to_lines_and_cols(
2110 &mut self,
2111 span: &SpanData,
2112 ) -> Option<(Lrc<SourceFile>, usize, BytePos, usize, BytePos)>;
hashing_controls(&self) -> HashingControls2113 fn hashing_controls(&self) -> HashingControls;
2114 }
2115
2116 impl<CTX> HashStable<CTX> for Span
2117 where
2118 CTX: HashStableContext,
2119 {
2120 /// Hashes a span in a stable way. We can't directly hash the span's `BytePos`
2121 /// fields (that would be similar to hashing pointers, since those are just
2122 /// offsets into the `SourceMap`). Instead, we hash the (file name, line, column)
2123 /// triple, which stays the same even if the containing `SourceFile` has moved
2124 /// within the `SourceMap`.
2125 ///
2126 /// Also note that we are hashing byte offsets for the column, not unicode
2127 /// codepoint offsets. For the purpose of the hash that's sufficient.
2128 /// Also, hashing filenames is expensive so we avoid doing it twice when the
2129 /// span starts and ends in the same file, which is almost always the case.
hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher)2130 fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) {
2131 const TAG_VALID_SPAN: u8 = 0;
2132 const TAG_INVALID_SPAN: u8 = 1;
2133 const TAG_RELATIVE_SPAN: u8 = 2;
2134
2135 if !ctx.hash_spans() {
2136 return;
2137 }
2138
2139 let span = self.data_untracked();
2140 span.ctxt.hash_stable(ctx, hasher);
2141 span.parent.hash_stable(ctx, hasher);
2142
2143 if span.is_dummy() {
2144 Hash::hash(&TAG_INVALID_SPAN, hasher);
2145 return;
2146 }
2147
2148 if let Some(parent) = span.parent {
2149 let def_span = ctx.def_span(parent).data_untracked();
2150 if def_span.contains(span) {
2151 // This span is enclosed in a definition: only hash the relative position.
2152 Hash::hash(&TAG_RELATIVE_SPAN, hasher);
2153 (span.lo - def_span.lo).to_u32().hash_stable(ctx, hasher);
2154 (span.hi - def_span.lo).to_u32().hash_stable(ctx, hasher);
2155 return;
2156 }
2157 }
2158
2159 // If this is not an empty or invalid span, we want to hash the last
2160 // position that belongs to it, as opposed to hashing the first
2161 // position past it.
2162 let Some((file, line_lo, col_lo, line_hi, col_hi)) = ctx.span_data_to_lines_and_cols(&span) else {
2163 Hash::hash(&TAG_INVALID_SPAN, hasher);
2164 return;
2165 };
2166
2167 Hash::hash(&TAG_VALID_SPAN, hasher);
2168 Hash::hash(&file.name_hash, hasher);
2169
2170 // Hash both the length and the end location (line/column) of a span. If we
2171 // hash only the length, for example, then two otherwise equal spans with
2172 // different end locations will have the same hash. This can cause a problem
2173 // during incremental compilation wherein a previous result for a query that
2174 // depends on the end location of a span will be incorrectly reused when the
2175 // end location of the span it depends on has changed (see issue #74890). A
2176 // similar analysis applies if some query depends specifically on the length
2177 // of the span, but we only hash the end location. So hash both.
2178
2179 let col_lo_trunc = (col_lo.0 as u64) & 0xFF;
2180 let line_lo_trunc = ((line_lo as u64) & 0xFF_FF_FF) << 8;
2181 let col_hi_trunc = (col_hi.0 as u64) & 0xFF << 32;
2182 let line_hi_trunc = ((line_hi as u64) & 0xFF_FF_FF) << 40;
2183 let col_line = col_lo_trunc | line_lo_trunc | col_hi_trunc | line_hi_trunc;
2184 let len = (span.hi - span.lo).0;
2185 Hash::hash(&col_line, hasher);
2186 Hash::hash(&len, hasher);
2187 }
2188 }
2189
2190 /// Useful type to use with `Result<>` indicate that an error has already
2191 /// been reported to the user, so no need to continue checking.
2192 #[derive(Clone, Copy, Debug, Encodable, Decodable, Hash, PartialEq, Eq, PartialOrd, Ord)]
2193 #[derive(HashStable_Generic)]
2194 pub struct ErrorGuaranteed(());
2195
2196 impl ErrorGuaranteed {
2197 /// To be used only if you really know what you are doing... ideally, we would find a way to
2198 /// eliminate all calls to this method.
2199 #[deprecated = "`Session::delay_span_bug` should be preferred over this function"]
unchecked_claim_error_was_emitted() -> Self2200 pub fn unchecked_claim_error_was_emitted() -> Self {
2201 ErrorGuaranteed(())
2202 }
2203 }
2204