• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Basic syntax highlighting functionality.
2 //!
3 //! This module uses librustc_ast's lexer to provide token-based highlighting for
4 //! the HTML documentation generated by rustdoc.
5 //!
6 //! Use the `render_with_highlighting` to highlight some rust code.
7 
8 use crate::clean::PrimitiveType;
9 use crate::html::escape::Escape;
10 use crate::html::render::{Context, LinkFromSrc};
11 
12 use std::collections::VecDeque;
13 use std::fmt::{Display, Write};
14 
15 use rustc_data_structures::fx::FxHashMap;
16 use rustc_lexer::{Cursor, LiteralKind, TokenKind};
17 use rustc_span::edition::Edition;
18 use rustc_span::symbol::Symbol;
19 use rustc_span::{BytePos, Span, DUMMY_SP};
20 
21 use super::format::{self, Buffer};
22 
23 /// This type is needed in case we want to render links on items to allow to go to their definition.
24 pub(crate) struct HrefContext<'a, 'tcx> {
25     pub(crate) context: &'a Context<'tcx>,
26     /// This span contains the current file we're going through.
27     pub(crate) file_span: Span,
28     /// This field is used to know "how far" from the top of the directory we are to link to either
29     /// documentation pages or other source pages.
30     pub(crate) root_path: &'a str,
31     /// This field is used to calculate precise local URLs.
32     pub(crate) current_href: String,
33 }
34 
35 /// Decorations are represented as a map from CSS class to vector of character ranges.
36 /// Each range will be wrapped in a span with that class.
37 #[derive(Default)]
38 pub(crate) struct DecorationInfo(pub(crate) FxHashMap<&'static str, Vec<(u32, u32)>>);
39 
40 #[derive(Eq, PartialEq, Clone, Copy)]
41 pub(crate) enum Tooltip {
42     Ignore,
43     CompileFail,
44     ShouldPanic,
45     Edition(Edition),
46     None,
47 }
48 
49 /// Highlights `src` as an inline example, returning the HTML output.
render_example_with_highlighting( src: &str, out: &mut Buffer, tooltip: Tooltip, playground_button: Option<&str>, )50 pub(crate) fn render_example_with_highlighting(
51     src: &str,
52     out: &mut Buffer,
53     tooltip: Tooltip,
54     playground_button: Option<&str>,
55 ) {
56     write_header(out, "rust-example-rendered", None, tooltip);
57     write_code(out, src, None, None);
58     write_footer(out, playground_button);
59 }
60 
61 /// Highlights `src` as an item-decl, returning the HTML output.
render_item_decl_with_highlighting(src: &str, out: &mut Buffer)62 pub(crate) fn render_item_decl_with_highlighting(src: &str, out: &mut Buffer) {
63     write!(out, "<pre class=\"rust item-decl\">");
64     write_code(out, src, None, None);
65     write!(out, "</pre>");
66 }
67 
write_header(out: &mut Buffer, class: &str, extra_content: Option<Buffer>, tooltip: Tooltip)68 fn write_header(out: &mut Buffer, class: &str, extra_content: Option<Buffer>, tooltip: Tooltip) {
69     write!(
70         out,
71         "<div class=\"example-wrap{}\">",
72         match tooltip {
73             Tooltip::Ignore => " ignore",
74             Tooltip::CompileFail => " compile_fail",
75             Tooltip::ShouldPanic => " should_panic",
76             Tooltip::Edition(_) => " edition",
77             Tooltip::None => "",
78         },
79     );
80 
81     if tooltip != Tooltip::None {
82         let edition_code;
83         write!(
84             out,
85             "<a href=\"#\" class=\"tooltip\" title=\"{}\">ⓘ</a>",
86             match tooltip {
87                 Tooltip::Ignore => "This example is not tested",
88                 Tooltip::CompileFail => "This example deliberately fails to compile",
89                 Tooltip::ShouldPanic => "This example panics",
90                 Tooltip::Edition(edition) => {
91                     edition_code = format!("This example runs with edition {edition}");
92                     &edition_code
93                 }
94                 Tooltip::None => unreachable!(),
95             },
96         );
97     }
98 
99     if let Some(extra) = extra_content {
100         out.push_buffer(extra);
101     }
102     if class.is_empty() {
103         write!(out, "<pre class=\"rust\">");
104     } else {
105         write!(out, "<pre class=\"rust {class}\">");
106     }
107     write!(out, "<code>");
108 }
109 
110 /// Check if two `Class` can be merged together. In the following rules, "unclassified" means `None`
111 /// basically (since it's `Option<Class>`). The following rules apply:
112 ///
113 /// * If two `Class` have the same variant, then they can be merged.
114 /// * If the other `Class` is unclassified and only contains white characters (backline,
115 ///   whitespace, etc), it can be merged.
116 /// * `Class::Ident` is considered the same as unclassified (because it doesn't have an associated
117 ///    CSS class).
can_merge(class1: Option<Class>, class2: Option<Class>, text: &str) -> bool118 fn can_merge(class1: Option<Class>, class2: Option<Class>, text: &str) -> bool {
119     match (class1, class2) {
120         (Some(c1), Some(c2)) => c1.is_equal_to(c2),
121         (Some(Class::Ident(_)), None) | (None, Some(Class::Ident(_))) => true,
122         (Some(_), None) | (None, Some(_)) => text.trim().is_empty(),
123         (None, None) => true,
124     }
125 }
126 
127 /// This type is used as a conveniency to prevent having to pass all its fields as arguments into
128 /// the various functions (which became its methods).
129 struct TokenHandler<'a, 'tcx, F: Write> {
130     out: &'a mut F,
131     /// It contains the closing tag and the associated `Class`.
132     closing_tags: Vec<(&'static str, Class)>,
133     /// This is used because we don't automatically generate the closing tag on `ExitSpan` in
134     /// case an `EnterSpan` event with the same class follows.
135     pending_exit_span: Option<Class>,
136     /// `current_class` and `pending_elems` are used to group HTML elements with same `class`
137     /// attributes to reduce the DOM size.
138     current_class: Option<Class>,
139     /// We need to keep the `Class` for each element because it could contain a `Span` which is
140     /// used to generate links.
141     pending_elems: Vec<(&'a str, Option<Class>)>,
142     href_context: Option<HrefContext<'a, 'tcx>>,
143 }
144 
145 impl<'a, 'tcx, F: Write> TokenHandler<'a, 'tcx, F> {
handle_exit_span(&mut self)146     fn handle_exit_span(&mut self) {
147         // We can't get the last `closing_tags` element using `pop()` because `closing_tags` is
148         // being used in `write_pending_elems`.
149         let class = self.closing_tags.last().expect("ExitSpan without EnterSpan").1;
150         // We flush everything just in case...
151         self.write_pending_elems(Some(class));
152 
153         exit_span(self.out, self.closing_tags.pop().expect("ExitSpan without EnterSpan").0);
154         self.pending_exit_span = None;
155     }
156 
157     /// Write all the pending elements sharing a same (or at mergeable) `Class`.
158     ///
159     /// If there is a "parent" (if a `EnterSpan` event was encountered) and the parent can be merged
160     /// with the elements' class, then we simply write the elements since the `ExitSpan` event will
161     /// close the tag.
162     ///
163     /// Otherwise, if there is only one pending element, we let the `string` function handle both
164     /// opening and closing the tag, otherwise we do it into this function.
165     ///
166     /// It returns `true` if `current_class` must be set to `None` afterwards.
write_pending_elems(&mut self, current_class: Option<Class>) -> bool167     fn write_pending_elems(&mut self, current_class: Option<Class>) -> bool {
168         if self.pending_elems.is_empty() {
169             return false;
170         }
171         if let Some((_, parent_class)) = self.closing_tags.last() &&
172             can_merge(current_class, Some(*parent_class), "")
173         {
174             for (text, class) in self.pending_elems.iter() {
175                 string(self.out, Escape(text), *class, &self.href_context, false);
176             }
177         } else {
178             // We only want to "open" the tag ourselves if we have more than one pending and if the
179             // current parent tag is not the same as our pending content.
180             let close_tag = if self.pending_elems.len() > 1 && let Some(current_class) = current_class {
181                 Some(enter_span(self.out, current_class, &self.href_context))
182             } else {
183                 None
184             };
185             for (text, class) in self.pending_elems.iter() {
186                 string(self.out, Escape(text), *class, &self.href_context, close_tag.is_none());
187             }
188             if let Some(close_tag) = close_tag {
189                 exit_span(self.out, close_tag);
190             }
191         }
192         self.pending_elems.clear();
193         true
194     }
195 }
196 
197 impl<'a, 'tcx, F: Write> Drop for TokenHandler<'a, 'tcx, F> {
198     /// When leaving, we need to flush all pending data to not have missing content.
drop(&mut self)199     fn drop(&mut self) {
200         if self.pending_exit_span.is_some() {
201             self.handle_exit_span();
202         } else {
203             self.write_pending_elems(self.current_class);
204         }
205     }
206 }
207 
208 /// Convert the given `src` source code into HTML by adding classes for highlighting.
209 ///
210 /// This code is used to render code blocks (in the documentation) as well as the source code pages.
211 ///
212 /// Some explanations on the last arguments:
213 ///
214 /// In case we are rendering a code block and not a source code file, `href_context` will be `None`.
215 /// To put it more simply: if `href_context` is `None`, the code won't try to generate links to an
216 /// item definition.
217 ///
218 /// More explanations about spans and how we use them here are provided in the
write_code( out: &mut impl Write, src: &str, href_context: Option<HrefContext<'_, '_>>, decoration_info: Option<DecorationInfo>, )219 pub(super) fn write_code(
220     out: &mut impl Write,
221     src: &str,
222     href_context: Option<HrefContext<'_, '_>>,
223     decoration_info: Option<DecorationInfo>,
224 ) {
225     // This replace allows to fix how the code source with DOS backline characters is displayed.
226     let src = src.replace("\r\n", "\n");
227     let mut token_handler = TokenHandler {
228         out,
229         closing_tags: Vec::new(),
230         pending_exit_span: None,
231         current_class: None,
232         pending_elems: Vec::new(),
233         href_context,
234     };
235 
236     Classifier::new(
237         &src,
238         token_handler.href_context.as_ref().map(|c| c.file_span).unwrap_or(DUMMY_SP),
239         decoration_info,
240     )
241     .highlight(&mut |highlight| {
242         match highlight {
243             Highlight::Token { text, class } => {
244                 // If we received a `ExitSpan` event and then have a non-compatible `Class`, we
245                 // need to close the `<span>`.
246                 let need_current_class_update = if let Some(pending) = token_handler.pending_exit_span &&
247                     !can_merge(Some(pending), class, text) {
248                         token_handler.handle_exit_span();
249                         true
250                 // If the two `Class` are different, time to flush the current content and start
251                 // a new one.
252                 } else if !can_merge(token_handler.current_class, class, text) {
253                     token_handler.write_pending_elems(token_handler.current_class);
254                     true
255                 } else {
256                     token_handler.current_class.is_none()
257                 };
258 
259                 if need_current_class_update {
260                     token_handler.current_class = class.map(Class::dummy);
261                 }
262                 token_handler.pending_elems.push((text, class));
263             }
264             Highlight::EnterSpan { class } => {
265                 let mut should_add = true;
266                 if let Some(pending_exit_span) = token_handler.pending_exit_span {
267                     if class.is_equal_to(pending_exit_span) {
268                         should_add = false;
269                     } else {
270                         token_handler.handle_exit_span();
271                     }
272                 } else {
273                     // We flush everything just in case...
274                     if token_handler.write_pending_elems(token_handler.current_class) {
275                         token_handler.current_class = None;
276                     }
277                 }
278                 if should_add {
279                     let closing_tag = enter_span(token_handler.out, class, &token_handler.href_context);
280                     token_handler.closing_tags.push((closing_tag, class));
281                 }
282 
283                 token_handler.current_class = None;
284                 token_handler.pending_exit_span = None;
285             }
286             Highlight::ExitSpan => {
287                 token_handler.current_class = None;
288                 token_handler.pending_exit_span =
289                     Some(token_handler.closing_tags.last().as_ref().expect("ExitSpan without EnterSpan").1);
290             }
291         };
292     });
293 }
294 
write_footer(out: &mut Buffer, playground_button: Option<&str>)295 fn write_footer(out: &mut Buffer, playground_button: Option<&str>) {
296     writeln!(out, "</code></pre>{}</div>", playground_button.unwrap_or_default());
297 }
298 
299 /// How a span of text is classified. Mostly corresponds to token kinds.
300 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
301 enum Class {
302     Comment,
303     DocComment,
304     Attribute,
305     KeyWord,
306     /// Keywords that do pointer/reference stuff.
307     RefKeyWord,
308     Self_(Span),
309     Macro(Span),
310     MacroNonTerminal,
311     String,
312     Number,
313     Bool,
314     /// `Ident` isn't rendered in the HTML but we still need it for the `Span` it contains.
315     Ident(Span),
316     Lifetime,
317     PreludeTy,
318     PreludeVal,
319     QuestionMark,
320     Decoration(&'static str),
321 }
322 
323 impl Class {
324     /// It is only looking at the variant, not the variant content.
325     ///
326     /// It is used mostly to group multiple similar HTML elements into one `<span>` instead of
327     /// multiple ones.
is_equal_to(self, other: Self) -> bool328     fn is_equal_to(self, other: Self) -> bool {
329         match (self, other) {
330             (Self::Self_(_), Self::Self_(_))
331             | (Self::Macro(_), Self::Macro(_))
332             | (Self::Ident(_), Self::Ident(_)) => true,
333             (Self::Decoration(c1), Self::Decoration(c2)) => c1 == c2,
334             (x, y) => x == y,
335         }
336     }
337 
338     /// If `self` contains a `Span`, it'll be replaced with `DUMMY_SP` to prevent creating links
339     /// on "empty content" (because of the attributes merge).
dummy(self) -> Self340     fn dummy(self) -> Self {
341         match self {
342             Self::Self_(_) => Self::Self_(DUMMY_SP),
343             Self::Macro(_) => Self::Macro(DUMMY_SP),
344             Self::Ident(_) => Self::Ident(DUMMY_SP),
345             s => s,
346         }
347     }
348 
349     /// Returns the css class expected by rustdoc for each `Class`.
as_html(self) -> &'static str350     fn as_html(self) -> &'static str {
351         match self {
352             Class::Comment => "comment",
353             Class::DocComment => "doccomment",
354             Class::Attribute => "attr",
355             Class::KeyWord => "kw",
356             Class::RefKeyWord => "kw-2",
357             Class::Self_(_) => "self",
358             Class::Macro(_) => "macro",
359             Class::MacroNonTerminal => "macro-nonterminal",
360             Class::String => "string",
361             Class::Number => "number",
362             Class::Bool => "bool-val",
363             Class::Ident(_) => "",
364             Class::Lifetime => "lifetime",
365             Class::PreludeTy => "prelude-ty",
366             Class::PreludeVal => "prelude-val",
367             Class::QuestionMark => "question-mark",
368             Class::Decoration(kind) => kind,
369         }
370     }
371 
372     /// In case this is an item which can be converted into a link to a definition, it'll contain
373     /// a "span" (a tuple representing `(lo, hi)` equivalent of `Span`).
get_span(self) -> Option<Span>374     fn get_span(self) -> Option<Span> {
375         match self {
376             Self::Ident(sp) | Self::Self_(sp) | Self::Macro(sp) => Some(sp),
377             Self::Comment
378             | Self::DocComment
379             | Self::Attribute
380             | Self::KeyWord
381             | Self::RefKeyWord
382             | Self::MacroNonTerminal
383             | Self::String
384             | Self::Number
385             | Self::Bool
386             | Self::Lifetime
387             | Self::PreludeTy
388             | Self::PreludeVal
389             | Self::QuestionMark
390             | Self::Decoration(_) => None,
391         }
392     }
393 }
394 
395 enum Highlight<'a> {
396     Token { text: &'a str, class: Option<Class> },
397     EnterSpan { class: Class },
398     ExitSpan,
399 }
400 
401 struct TokenIter<'a> {
402     src: &'a str,
403     cursor: Cursor<'a>,
404 }
405 
406 impl<'a> Iterator for TokenIter<'a> {
407     type Item = (TokenKind, &'a str);
next(&mut self) -> Option<(TokenKind, &'a str)>408     fn next(&mut self) -> Option<(TokenKind, &'a str)> {
409         let token = self.cursor.advance_token();
410         if token.kind == TokenKind::Eof {
411             return None;
412         }
413         let (text, rest) = self.src.split_at(token.len as usize);
414         self.src = rest;
415         Some((token.kind, text))
416     }
417 }
418 
419 /// Classifies into identifier class; returns `None` if this is a non-keyword identifier.
get_real_ident_class(text: &str, allow_path_keywords: bool) -> Option<Class>420 fn get_real_ident_class(text: &str, allow_path_keywords: bool) -> Option<Class> {
421     let ignore: &[&str] =
422         if allow_path_keywords { &["self", "Self", "super", "crate"] } else { &["self", "Self"] };
423     if ignore.iter().any(|k| *k == text) {
424         return None;
425     }
426     Some(match text {
427         "ref" | "mut" => Class::RefKeyWord,
428         "false" | "true" => Class::Bool,
429         _ if Symbol::intern(text).is_reserved(|| Edition::Edition2021) => Class::KeyWord,
430         _ => return None,
431     })
432 }
433 
434 /// This iterator comes from the same idea than "Peekable" except that it allows to "peek" more than
435 /// just the next item by using `peek_next`. The `peek` method always returns the next item after
436 /// the current one whereas `peek_next` will return the next item after the last one peeked.
437 ///
438 /// You can use both `peek` and `peek_next` at the same time without problem.
439 struct PeekIter<'a> {
440     stored: VecDeque<(TokenKind, &'a str)>,
441     /// This position is reinitialized when using `next`. It is used in `peek_next`.
442     peek_pos: usize,
443     iter: TokenIter<'a>,
444 }
445 
446 impl<'a> PeekIter<'a> {
new(iter: TokenIter<'a>) -> Self447     fn new(iter: TokenIter<'a>) -> Self {
448         Self { stored: VecDeque::new(), peek_pos: 0, iter }
449     }
450     /// Returns the next item after the current one. It doesn't interfere with `peek_next` output.
peek(&mut self) -> Option<&(TokenKind, &'a str)>451     fn peek(&mut self) -> Option<&(TokenKind, &'a str)> {
452         if self.stored.is_empty() && let Some(next) = self.iter.next() {
453             self.stored.push_back(next);
454         }
455         self.stored.front()
456     }
457     /// Returns the next item after the last one peeked. It doesn't interfere with `peek` output.
peek_next(&mut self) -> Option<&(TokenKind, &'a str)>458     fn peek_next(&mut self) -> Option<&(TokenKind, &'a str)> {
459         self.peek_pos += 1;
460         if self.peek_pos - 1 < self.stored.len() {
461             self.stored.get(self.peek_pos - 1)
462         } else if let Some(next) = self.iter.next() {
463             self.stored.push_back(next);
464             self.stored.back()
465         } else {
466             None
467         }
468     }
469 }
470 
471 impl<'a> Iterator for PeekIter<'a> {
472     type Item = (TokenKind, &'a str);
next(&mut self) -> Option<Self::Item>473     fn next(&mut self) -> Option<Self::Item> {
474         self.peek_pos = 0;
475         if let Some(first) = self.stored.pop_front() { Some(first) } else { self.iter.next() }
476     }
477 }
478 
479 /// Custom spans inserted into the source. Eg --scrape-examples uses this to highlight function calls
480 struct Decorations {
481     starts: Vec<(u32, &'static str)>,
482     ends: Vec<u32>,
483 }
484 
485 impl Decorations {
new(info: DecorationInfo) -> Self486     fn new(info: DecorationInfo) -> Self {
487         // Extract tuples (start, end, kind) into separate sequences of (start, kind) and (end).
488         let (mut starts, mut ends): (Vec<_>, Vec<_>) = info
489             .0
490             .into_iter()
491             .flat_map(|(kind, ranges)| ranges.into_iter().map(move |(lo, hi)| ((lo, kind), hi)))
492             .unzip();
493 
494         // Sort the sequences in document order.
495         starts.sort_by_key(|(lo, _)| *lo);
496         ends.sort();
497 
498         Decorations { starts, ends }
499     }
500 }
501 
502 /// Processes program tokens, classifying strings of text by highlighting
503 /// category (`Class`).
504 struct Classifier<'src> {
505     tokens: PeekIter<'src>,
506     in_attribute: bool,
507     in_macro: bool,
508     in_macro_nonterminal: bool,
509     byte_pos: u32,
510     file_span: Span,
511     src: &'src str,
512     decorations: Option<Decorations>,
513 }
514 
515 impl<'src> Classifier<'src> {
516     /// Takes as argument the source code to HTML-ify, the rust edition to use and the source code
517     /// file span which will be used later on by the `span_correspondence_map`.
new(src: &str, file_span: Span, decoration_info: Option<DecorationInfo>) -> Classifier<'_>518     fn new(src: &str, file_span: Span, decoration_info: Option<DecorationInfo>) -> Classifier<'_> {
519         let tokens = PeekIter::new(TokenIter { src, cursor: Cursor::new(src) });
520         let decorations = decoration_info.map(Decorations::new);
521         Classifier {
522             tokens,
523             in_attribute: false,
524             in_macro: false,
525             in_macro_nonterminal: false,
526             byte_pos: 0,
527             file_span,
528             src,
529             decorations,
530         }
531     }
532 
533     /// Convenient wrapper to create a [`Span`] from a position in the file.
new_span(&self, lo: u32, text: &str) -> Span534     fn new_span(&self, lo: u32, text: &str) -> Span {
535         let hi = lo + text.len() as u32;
536         let file_lo = self.file_span.lo();
537         self.file_span.with_lo(file_lo + BytePos(lo)).with_hi(file_lo + BytePos(hi))
538     }
539 
540     /// Concatenate colons and idents as one when possible.
get_full_ident_path(&mut self) -> Vec<(TokenKind, usize, usize)>541     fn get_full_ident_path(&mut self) -> Vec<(TokenKind, usize, usize)> {
542         let start = self.byte_pos as usize;
543         let mut pos = start;
544         let mut has_ident = false;
545 
546         loop {
547             let mut nb = 0;
548             while let Some((TokenKind::Colon, _)) = self.tokens.peek() {
549                 self.tokens.next();
550                 nb += 1;
551             }
552             // Ident path can start with "::" but if we already have content in the ident path,
553             // the "::" is mandatory.
554             if has_ident && nb == 0 {
555                 return vec![(TokenKind::Ident, start, pos)];
556             } else if nb != 0 && nb != 2 {
557                 if has_ident {
558                     return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)];
559                 } else {
560                     return vec![(TokenKind::Colon, start, pos + nb)];
561                 }
562             }
563 
564             if let Some((None, text)) = self.tokens.peek().map(|(token, text)| {
565                 if *token == TokenKind::Ident {
566                     let class = get_real_ident_class(text, true);
567                     (class, text)
568                 } else {
569                     // Doesn't matter which Class we put in here...
570                     (Some(Class::Comment), text)
571                 }
572             }) {
573                 // We only "add" the colon if there is an ident behind.
574                 pos += text.len() + nb;
575                 has_ident = true;
576                 self.tokens.next();
577             } else if nb > 0 && has_ident {
578                 return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)];
579             } else if nb > 0 {
580                 return vec![(TokenKind::Colon, start, start + nb)];
581             } else if has_ident {
582                 return vec![(TokenKind::Ident, start, pos)];
583             } else {
584                 return Vec::new();
585             }
586         }
587     }
588 
589     /// Wraps the tokens iteration to ensure that the `byte_pos` is always correct.
590     ///
591     /// It returns the token's kind, the token as a string and its byte position in the source
592     /// string.
next(&mut self) -> Option<(TokenKind, &'src str, u32)>593     fn next(&mut self) -> Option<(TokenKind, &'src str, u32)> {
594         if let Some((kind, text)) = self.tokens.next() {
595             let before = self.byte_pos;
596             self.byte_pos += text.len() as u32;
597             Some((kind, text, before))
598         } else {
599             None
600         }
601     }
602 
603     /// Exhausts the `Classifier` writing the output into `sink`.
604     ///
605     /// The general structure for this method is to iterate over each token,
606     /// possibly giving it an HTML span with a class specifying what flavor of
607     /// token is used.
highlight(mut self, sink: &mut dyn FnMut(Highlight<'src>))608     fn highlight(mut self, sink: &mut dyn FnMut(Highlight<'src>)) {
609         loop {
610             if let Some(decs) = self.decorations.as_mut() {
611                 let byte_pos = self.byte_pos;
612                 let n_starts = decs.starts.iter().filter(|(i, _)| byte_pos >= *i).count();
613                 for (_, kind) in decs.starts.drain(0..n_starts) {
614                     sink(Highlight::EnterSpan { class: Class::Decoration(kind) });
615                 }
616 
617                 let n_ends = decs.ends.iter().filter(|i| byte_pos >= **i).count();
618                 for _ in decs.ends.drain(0..n_ends) {
619                     sink(Highlight::ExitSpan);
620                 }
621             }
622 
623             if self
624                 .tokens
625                 .peek()
626                 .map(|t| matches!(t.0, TokenKind::Colon | TokenKind::Ident))
627                 .unwrap_or(false)
628             {
629                 let tokens = self.get_full_ident_path();
630                 for (token, start, end) in &tokens {
631                     let text = &self.src[*start..*end];
632                     self.advance(*token, text, sink, *start as u32);
633                     self.byte_pos += text.len() as u32;
634                 }
635                 if !tokens.is_empty() {
636                     continue;
637                 }
638             }
639             if let Some((token, text, before)) = self.next() {
640                 self.advance(token, text, sink, before);
641             } else {
642                 break;
643             }
644         }
645     }
646 
647     /// Single step of highlighting. This will classify `token`, but maybe also a couple of
648     /// following ones as well.
649     ///
650     /// `before` is the position of the given token in the `source` string and is used as "lo" byte
651     /// in case we want to try to generate a link for this token using the
652     /// `span_correspondence_map`.
advance( &mut self, token: TokenKind, text: &'src str, sink: &mut dyn FnMut(Highlight<'src>), before: u32, )653     fn advance(
654         &mut self,
655         token: TokenKind,
656         text: &'src str,
657         sink: &mut dyn FnMut(Highlight<'src>),
658         before: u32,
659     ) {
660         let lookahead = self.peek();
661         let no_highlight = |sink: &mut dyn FnMut(_)| sink(Highlight::Token { text, class: None });
662         let class = match token {
663             TokenKind::Whitespace => return no_highlight(sink),
664             TokenKind::LineComment { doc_style } | TokenKind::BlockComment { doc_style, .. } => {
665                 if doc_style.is_some() {
666                     Class::DocComment
667                 } else {
668                     Class::Comment
669                 }
670             }
671             // Consider this as part of a macro invocation if there was a
672             // leading identifier.
673             TokenKind::Bang if self.in_macro => {
674                 self.in_macro = false;
675                 sink(Highlight::Token { text, class: None });
676                 sink(Highlight::ExitSpan);
677                 return;
678             }
679 
680             // Assume that '&' or '*' is the reference or dereference operator
681             // or a reference or pointer type. Unless, of course, it looks like
682             // a logical and or a multiplication operator: `&&` or `* `.
683             TokenKind::Star => match self.tokens.peek() {
684                 Some((TokenKind::Whitespace, _)) => return no_highlight(sink),
685                 Some((TokenKind::Ident, "mut")) => {
686                     self.next();
687                     sink(Highlight::Token { text: "*mut", class: Some(Class::RefKeyWord) });
688                     return;
689                 }
690                 Some((TokenKind::Ident, "const")) => {
691                     self.next();
692                     sink(Highlight::Token { text: "*const", class: Some(Class::RefKeyWord) });
693                     return;
694                 }
695                 _ => Class::RefKeyWord,
696             },
697             TokenKind::And => match self.tokens.peek() {
698                 Some((TokenKind::And, _)) => {
699                     self.next();
700                     sink(Highlight::Token { text: "&&", class: None });
701                     return;
702                 }
703                 Some((TokenKind::Eq, _)) => {
704                     self.next();
705                     sink(Highlight::Token { text: "&=", class: None });
706                     return;
707                 }
708                 Some((TokenKind::Whitespace, _)) => return no_highlight(sink),
709                 Some((TokenKind::Ident, "mut")) => {
710                     self.next();
711                     sink(Highlight::Token { text: "&mut", class: Some(Class::RefKeyWord) });
712                     return;
713                 }
714                 _ => Class::RefKeyWord,
715             },
716 
717             // These can either be operators, or arrows.
718             TokenKind::Eq => match lookahead {
719                 Some(TokenKind::Eq) => {
720                     self.next();
721                     sink(Highlight::Token { text: "==", class: None });
722                     return;
723                 }
724                 Some(TokenKind::Gt) => {
725                     self.next();
726                     sink(Highlight::Token { text: "=>", class: None });
727                     return;
728                 }
729                 _ => return no_highlight(sink),
730             },
731             TokenKind::Minus if lookahead == Some(TokenKind::Gt) => {
732                 self.next();
733                 sink(Highlight::Token { text: "->", class: None });
734                 return;
735             }
736 
737             // Other operators.
738             TokenKind::Minus
739             | TokenKind::Plus
740             | TokenKind::Or
741             | TokenKind::Slash
742             | TokenKind::Caret
743             | TokenKind::Percent
744             | TokenKind::Bang
745             | TokenKind::Lt
746             | TokenKind::Gt => return no_highlight(sink),
747 
748             // Miscellaneous, no highlighting.
749             TokenKind::Dot
750             | TokenKind::Semi
751             | TokenKind::Comma
752             | TokenKind::OpenParen
753             | TokenKind::CloseParen
754             | TokenKind::OpenBrace
755             | TokenKind::CloseBrace
756             | TokenKind::OpenBracket
757             | TokenKind::At
758             | TokenKind::Tilde
759             | TokenKind::Colon
760             | TokenKind::Unknown => return no_highlight(sink),
761 
762             TokenKind::Question => Class::QuestionMark,
763 
764             TokenKind::Dollar => match lookahead {
765                 Some(TokenKind::Ident) => {
766                     self.in_macro_nonterminal = true;
767                     Class::MacroNonTerminal
768                 }
769                 _ => return no_highlight(sink),
770             },
771 
772             // This might be the start of an attribute. We're going to want to
773             // continue highlighting it as an attribute until the ending ']' is
774             // seen, so skip out early. Down below we terminate the attribute
775             // span when we see the ']'.
776             TokenKind::Pound => {
777                 match lookahead {
778                     // Case 1: #![inner_attribute]
779                     Some(TokenKind::Bang) => {
780                         self.next();
781                         if let Some(TokenKind::OpenBracket) = self.peek() {
782                             self.in_attribute = true;
783                             sink(Highlight::EnterSpan { class: Class::Attribute });
784                         }
785                         sink(Highlight::Token { text: "#", class: None });
786                         sink(Highlight::Token { text: "!", class: None });
787                         return;
788                     }
789                     // Case 2: #[outer_attribute]
790                     Some(TokenKind::OpenBracket) => {
791                         self.in_attribute = true;
792                         sink(Highlight::EnterSpan { class: Class::Attribute });
793                     }
794                     _ => (),
795                 }
796                 return no_highlight(sink);
797             }
798             TokenKind::CloseBracket => {
799                 if self.in_attribute {
800                     self.in_attribute = false;
801                     sink(Highlight::Token { text: "]", class: None });
802                     sink(Highlight::ExitSpan);
803                     return;
804                 }
805                 return no_highlight(sink);
806             }
807             TokenKind::Literal { kind, .. } => match kind {
808                 // Text literals.
809                 LiteralKind::Byte { .. }
810                 | LiteralKind::Char { .. }
811                 | LiteralKind::Str { .. }
812                 | LiteralKind::ByteStr { .. }
813                 | LiteralKind::RawStr { .. }
814                 | LiteralKind::RawByteStr { .. }
815                 | LiteralKind::CStr { .. }
816                 | LiteralKind::RawCStr { .. } => Class::String,
817                 // Number literals.
818                 LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
819             },
820             TokenKind::Ident | TokenKind::RawIdent if lookahead == Some(TokenKind::Bang) => {
821                 self.in_macro = true;
822                 sink(Highlight::EnterSpan { class: Class::Macro(self.new_span(before, text)) });
823                 sink(Highlight::Token { text, class: None });
824                 return;
825             }
826             TokenKind::Ident => match get_real_ident_class(text, false) {
827                 None => match text {
828                     "Option" | "Result" => Class::PreludeTy,
829                     "Some" | "None" | "Ok" | "Err" => Class::PreludeVal,
830                     // "union" is a weak keyword and is only considered as a keyword when declaring
831                     // a union type.
832                     "union" if self.check_if_is_union_keyword() => Class::KeyWord,
833                     _ if self.in_macro_nonterminal => {
834                         self.in_macro_nonterminal = false;
835                         Class::MacroNonTerminal
836                     }
837                     "self" | "Self" => Class::Self_(self.new_span(before, text)),
838                     _ => Class::Ident(self.new_span(before, text)),
839                 },
840                 Some(c) => c,
841             },
842             TokenKind::RawIdent | TokenKind::UnknownPrefix | TokenKind::InvalidIdent => {
843                 Class::Ident(self.new_span(before, text))
844             }
845             TokenKind::Lifetime { .. } => Class::Lifetime,
846             TokenKind::Eof => panic!("Eof in advance"),
847         };
848         // Anything that didn't return above is the simple case where we the
849         // class just spans a single token, so we can use the `string` method.
850         sink(Highlight::Token { text, class: Some(class) });
851     }
852 
peek(&mut self) -> Option<TokenKind>853     fn peek(&mut self) -> Option<TokenKind> {
854         self.tokens.peek().map(|(token_kind, _text)| *token_kind)
855     }
856 
check_if_is_union_keyword(&mut self) -> bool857     fn check_if_is_union_keyword(&mut self) -> bool {
858         while let Some(kind) = self.tokens.peek_next().map(|(token_kind, _text)| token_kind) {
859             if *kind == TokenKind::Whitespace {
860                 continue;
861             }
862             return *kind == TokenKind::Ident;
863         }
864         false
865     }
866 }
867 
868 /// Called when we start processing a span of text that should be highlighted.
869 /// The `Class` argument specifies how it should be highlighted.
enter_span( out: &mut impl Write, klass: Class, href_context: &Option<HrefContext<'_, '_>>, ) -> &'static str870 fn enter_span(
871     out: &mut impl Write,
872     klass: Class,
873     href_context: &Option<HrefContext<'_, '_>>,
874 ) -> &'static str {
875     string_without_closing_tag(out, "", Some(klass), href_context, true).expect(
876         "internal error: enter_span was called with Some(klass) but did not return a \
877             closing HTML tag",
878     )
879 }
880 
881 /// Called at the end of a span of highlighted text.
exit_span(out: &mut impl Write, closing_tag: &str)882 fn exit_span(out: &mut impl Write, closing_tag: &str) {
883     out.write_str(closing_tag).unwrap();
884 }
885 
886 /// Called for a span of text. If the text should be highlighted differently
887 /// from the surrounding text, then the `Class` argument will be a value other
888 /// than `None`.
889 ///
890 /// The following sequences of callbacks are equivalent:
891 /// ```plain
892 ///     enter_span(Foo), string("text", None), exit_span()
893 ///     string("text", Foo)
894 /// ```
895 ///
896 /// The latter can be thought of as a shorthand for the former, which is more
897 /// flexible.
898 ///
899 /// Note that if `context` is not `None` and that the given `klass` contains a `Span`, the function
900 /// will then try to find this `span` in the `span_correspondence_map`. If found, it'll then
901 /// generate a link for this element (which corresponds to where its definition is located).
string<T: Display>( out: &mut impl Write, text: T, klass: Option<Class>, href_context: &Option<HrefContext<'_, '_>>, open_tag: bool, )902 fn string<T: Display>(
903     out: &mut impl Write,
904     text: T,
905     klass: Option<Class>,
906     href_context: &Option<HrefContext<'_, '_>>,
907     open_tag: bool,
908 ) {
909     if let Some(closing_tag) = string_without_closing_tag(out, text, klass, href_context, open_tag)
910     {
911         out.write_str(closing_tag).unwrap();
912     }
913 }
914 
915 /// This function writes `text` into `out` with some modifications depending on `klass`:
916 ///
917 /// * If `klass` is `None`, `text` is written into `out` with no modification.
918 /// * If `klass` is `Some` but `klass.get_span()` is `None`, it writes the text wrapped in a
919 ///   `<span>` with the provided `klass`.
920 /// * If `klass` is `Some` and has a [`rustc_span::Span`], it then tries to generate a link (`<a>`
921 ///   element) by retrieving the link information from the `span_correspondence_map` that was filled
922 ///   in `span_map.rs::collect_spans_and_sources`. If it cannot retrieve the information, then it's
923 ///   the same as the second point (`klass` is `Some` but doesn't have a [`rustc_span::Span`]).
string_without_closing_tag<T: Display>( out: &mut impl Write, text: T, klass: Option<Class>, href_context: &Option<HrefContext<'_, '_>>, open_tag: bool, ) -> Option<&'static str>924 fn string_without_closing_tag<T: Display>(
925     out: &mut impl Write,
926     text: T,
927     klass: Option<Class>,
928     href_context: &Option<HrefContext<'_, '_>>,
929     open_tag: bool,
930 ) -> Option<&'static str> {
931     let Some(klass) = klass
932     else {
933         write!(out, "{}", text).unwrap();
934         return None;
935     };
936     let Some(def_span) = klass.get_span()
937     else {
938         if !open_tag {
939             write!(out, "{}", text).unwrap();
940             return None;
941         }
942         write!(out, "<span class=\"{}\">{}", klass.as_html(), text).unwrap();
943         return Some("</span>");
944     };
945 
946     let mut text_s = text.to_string();
947     if text_s.contains("::") {
948         text_s = text_s.split("::").intersperse("::").fold(String::new(), |mut path, t| {
949             match t {
950                 "self" | "Self" => write!(
951                     &mut path,
952                     "<span class=\"{}\">{}</span>",
953                     Class::Self_(DUMMY_SP).as_html(),
954                     t
955                 ),
956                 "crate" | "super" => {
957                     write!(&mut path, "<span class=\"{}\">{}</span>", Class::KeyWord.as_html(), t)
958                 }
959                 t => write!(&mut path, "{}", t),
960             }
961             .expect("Failed to build source HTML path");
962             path
963         });
964     }
965 
966     if let Some(href_context) = href_context {
967         if let Some(href) =
968             href_context.context.shared.span_correspondence_map.get(&def_span).and_then(|href| {
969                 let context = href_context.context;
970                 // FIXME: later on, it'd be nice to provide two links (if possible) for all items:
971                 // one to the documentation page and one to the source definition.
972                 // FIXME: currently, external items only generate a link to their documentation,
973                 // a link to their definition can be generated using this:
974                 // https://github.com/rust-lang/rust/blob/60f1a2fc4b535ead9c85ce085fdce49b1b097531/src/librustdoc/html/render/context.rs#L315-L338
975                 match href {
976                     LinkFromSrc::Local(span) => {
977                         context.href_from_span_relative(*span, &href_context.current_href)
978                     }
979                     LinkFromSrc::External(def_id) => {
980                         format::href_with_root_path(*def_id, context, Some(href_context.root_path))
981                             .ok()
982                             .map(|(url, _, _)| url)
983                     }
984                     LinkFromSrc::Primitive(prim) => format::href_with_root_path(
985                         PrimitiveType::primitive_locations(context.tcx())[prim],
986                         context,
987                         Some(href_context.root_path),
988                     )
989                     .ok()
990                     .map(|(url, _, _)| url),
991                 }
992             })
993         {
994             if !open_tag {
995                 // We're already inside an element which has the same klass, no need to give it
996                 // again.
997                 write!(out, "<a href=\"{}\">{}", href, text_s).unwrap();
998             } else {
999                 let klass_s = klass.as_html();
1000                 if klass_s.is_empty() {
1001                     write!(out, "<a href=\"{}\">{}", href, text_s).unwrap();
1002                 } else {
1003                     write!(out, "<a class=\"{}\" href=\"{}\">{}", klass_s, href, text_s).unwrap();
1004                 }
1005             }
1006             return Some("</a>");
1007         }
1008     }
1009     if !open_tag {
1010         write!(out, "{}", text_s).unwrap();
1011         return None;
1012     }
1013     let klass_s = klass.as_html();
1014     if klass_s.is_empty() {
1015         out.write_str(&text_s).unwrap();
1016         Some("")
1017     } else {
1018         write!(out, "<span class=\"{}\">{}", klass_s, text_s).unwrap();
1019         Some("</span>")
1020     }
1021 }
1022 
1023 #[cfg(test)]
1024 mod tests;
1025