1 use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag};
2 use rustc_ast as ast;
3 use rustc_ast::util::comments::beautify_doc_string;
4 use rustc_data_structures::fx::FxHashMap;
5 use rustc_span::def_id::DefId;
6 use rustc_span::symbol::{kw, sym, Symbol};
7 use rustc_span::Span;
8 use std::{cmp, mem};
9
10 #[derive(Clone, Copy, PartialEq, Eq, Debug)]
11 pub enum DocFragmentKind {
12 /// A doc fragment created from a `///` or `//!` doc comment.
13 SugaredDoc,
14 /// A doc fragment created from a "raw" `#[doc=""]` attribute.
15 RawDoc,
16 }
17
18 /// A portion of documentation, extracted from a `#[doc]` attribute.
19 ///
20 /// Each variant contains the line number within the complete doc-comment where the fragment
21 /// starts, as well as the Span where the corresponding doc comment or attribute is located.
22 ///
23 /// Included files are kept separate from inline doc comments so that proper line-number
24 /// information can be given when a doctest fails. Sugared doc comments and "raw" doc comments are
25 /// kept separate because of issue #42760.
26 #[derive(Clone, PartialEq, Eq, Debug)]
27 pub struct DocFragment {
28 pub span: Span,
29 /// The item this doc-comment came from.
30 /// Used to determine the scope in which doc links in this fragment are resolved.
31 /// Typically filled for reexport docs when they are merged into the docs of the
32 /// original reexported item.
33 /// If the id is not filled, which happens for the original reexported item, then
34 /// it has to be taken from somewhere else during doc link resolution.
35 pub item_id: Option<DefId>,
36 pub doc: Symbol,
37 pub kind: DocFragmentKind,
38 pub indent: usize,
39 }
40
41 #[derive(Clone, Copy, Debug)]
42 pub enum MalformedGenerics {
43 /// This link has unbalanced angle brackets.
44 ///
45 /// For example, `Vec<T` should trigger this, as should `Vec<T>>`.
46 UnbalancedAngleBrackets,
47 /// The generics are not attached to a type.
48 ///
49 /// For example, `<T>` should trigger this.
50 ///
51 /// This is detected by checking if the path is empty after the generics are stripped.
52 MissingType,
53 /// The link uses fully-qualified syntax, which is currently unsupported.
54 ///
55 /// For example, `<Vec as IntoIterator>::into_iter` should trigger this.
56 ///
57 /// This is detected by checking if ` as ` (the keyword `as` with spaces around it) is inside
58 /// angle brackets.
59 HasFullyQualifiedSyntax,
60 /// The link has an invalid path separator.
61 ///
62 /// For example, `Vec:<T>:new()` should trigger this. Note that `Vec:new()` will **not**
63 /// trigger this because it has no generics and thus [`strip_generics_from_path`] will not be
64 /// called.
65 ///
66 /// Note that this will also **not** be triggered if the invalid path separator is inside angle
67 /// brackets because rustdoc mostly ignores what's inside angle brackets (except for
68 /// [`HasFullyQualifiedSyntax`](MalformedGenerics::HasFullyQualifiedSyntax)).
69 ///
70 /// This is detected by checking if there is a colon followed by a non-colon in the link.
71 InvalidPathSeparator,
72 /// The link has too many angle brackets.
73 ///
74 /// For example, `Vec<<T>>` should trigger this.
75 TooManyAngleBrackets,
76 /// The link has empty angle brackets.
77 ///
78 /// For example, `Vec<>` should trigger this.
79 EmptyAngleBrackets,
80 }
81
82 /// Removes excess indentation on comments in order for the Markdown
83 /// to be parsed correctly. This is necessary because the convention for
84 /// writing documentation is to provide a space between the /// or //! marker
85 /// and the doc text, but Markdown is whitespace-sensitive. For example,
86 /// a block of text with four-space indentation is parsed as a code block,
87 /// so if we didn't unindent comments, these list items
88 ///
89 /// /// A list:
90 /// ///
91 /// /// - Foo
92 /// /// - Bar
93 ///
94 /// would be parsed as if they were in a code block, which is likely not what the user intended.
unindent_doc_fragments(docs: &mut [DocFragment])95 pub fn unindent_doc_fragments(docs: &mut [DocFragment]) {
96 // `add` is used in case the most common sugared doc syntax is used ("/// "). The other
97 // fragments kind's lines are never starting with a whitespace unless they are using some
98 // markdown formatting requiring it. Therefore, if the doc block have a mix between the two,
99 // we need to take into account the fact that the minimum indent minus one (to take this
100 // whitespace into account).
101 //
102 // For example:
103 //
104 // /// hello!
105 // #[doc = "another"]
106 //
107 // In this case, you want "hello! another" and not "hello! another".
108 let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
109 && docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc)
110 {
111 // In case we have a mix of sugared doc comments and "raw" ones, we want the sugared one to
112 // "decide" how much the minimum indent will be.
113 1
114 } else {
115 0
116 };
117
118 // `min_indent` is used to know how much whitespaces from the start of each lines must be
119 // removed. Example:
120 //
121 // /// hello!
122 // #[doc = "another"]
123 //
124 // In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
125 // 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
126 // (5 - 1) whitespaces.
127 let Some(min_indent) = docs
128 .iter()
129 .map(|fragment| {
130 fragment.doc.as_str().lines().fold(usize::MAX, |min_indent, line| {
131 if line.chars().all(|c| c.is_whitespace()) {
132 min_indent
133 } else {
134 // Compare against either space or tab, ignoring whether they are
135 // mixed or not.
136 let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
137 cmp::min(min_indent, whitespace)
138 + if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add }
139 }
140 })
141 })
142 .min()
143 else {
144 return;
145 };
146
147 for fragment in docs {
148 if fragment.doc == kw::Empty {
149 continue;
150 }
151
152 let min_indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 {
153 min_indent - add
154 } else {
155 min_indent
156 };
157
158 fragment.indent = min_indent;
159 }
160 }
161
162 /// The goal of this function is to apply the `DocFragment` transformation that is required when
163 /// transforming into the final Markdown, which is applying the computed indent to each line in
164 /// each doc fragment (a `DocFragment` can contain multiple lines in case of `#[doc = ""]`).
165 ///
166 /// Note: remove the trailing newline where appropriate
add_doc_fragment(out: &mut String, frag: &DocFragment)167 pub fn add_doc_fragment(out: &mut String, frag: &DocFragment) {
168 let s = frag.doc.as_str();
169 let mut iter = s.lines();
170 if s.is_empty() {
171 out.push('\n');
172 return;
173 }
174 while let Some(line) = iter.next() {
175 if line.chars().any(|c| !c.is_whitespace()) {
176 assert!(line.len() >= frag.indent);
177 out.push_str(&line[frag.indent..]);
178 } else {
179 out.push_str(line);
180 }
181 out.push('\n');
182 }
183 }
184
attrs_to_doc_fragments<'a>( attrs: impl Iterator<Item = (&'a ast::Attribute, Option<DefId>)>, doc_only: bool, ) -> (Vec<DocFragment>, ast::AttrVec)185 pub fn attrs_to_doc_fragments<'a>(
186 attrs: impl Iterator<Item = (&'a ast::Attribute, Option<DefId>)>,
187 doc_only: bool,
188 ) -> (Vec<DocFragment>, ast::AttrVec) {
189 let mut doc_fragments = Vec::new();
190 let mut other_attrs = ast::AttrVec::new();
191 for (attr, item_id) in attrs {
192 if let Some((doc_str, comment_kind)) = attr.doc_str_and_comment_kind() {
193 let doc = beautify_doc_string(doc_str, comment_kind);
194 let kind = if attr.is_doc_comment() {
195 DocFragmentKind::SugaredDoc
196 } else {
197 DocFragmentKind::RawDoc
198 };
199 let fragment = DocFragment { span: attr.span, doc, kind, item_id, indent: 0 };
200 doc_fragments.push(fragment);
201 } else if !doc_only {
202 other_attrs.push(attr.clone());
203 }
204 }
205
206 unindent_doc_fragments(&mut doc_fragments);
207
208 (doc_fragments, other_attrs)
209 }
210
211 /// Return the doc-comments on this item, grouped by the module they came from.
212 /// The module can be different if this is a re-export with added documentation.
213 ///
214 /// The last newline is not trimmed so the produced strings are reusable between
215 /// early and late doc link resolution regardless of their position.
prepare_to_doc_link_resolution( doc_fragments: &[DocFragment], ) -> FxHashMap<Option<DefId>, String>216 pub fn prepare_to_doc_link_resolution(
217 doc_fragments: &[DocFragment],
218 ) -> FxHashMap<Option<DefId>, String> {
219 let mut res = FxHashMap::default();
220 for fragment in doc_fragments {
221 let out_str = res.entry(fragment.item_id).or_default();
222 add_doc_fragment(out_str, fragment);
223 }
224 res
225 }
226
227 /// Options for rendering Markdown in the main body of documentation.
main_body_opts() -> Options228 pub fn main_body_opts() -> Options {
229 Options::ENABLE_TABLES
230 | Options::ENABLE_FOOTNOTES
231 | Options::ENABLE_STRIKETHROUGH
232 | Options::ENABLE_TASKLISTS
233 | Options::ENABLE_SMART_PUNCTUATION
234 }
235
strip_generics_from_path_segment(segment: Vec<char>) -> Result<String, MalformedGenerics>236 fn strip_generics_from_path_segment(segment: Vec<char>) -> Result<String, MalformedGenerics> {
237 let mut stripped_segment = String::new();
238 let mut param_depth = 0;
239
240 let mut latest_generics_chunk = String::new();
241
242 for c in segment {
243 if c == '<' {
244 param_depth += 1;
245 latest_generics_chunk.clear();
246 } else if c == '>' {
247 param_depth -= 1;
248 if latest_generics_chunk.contains(" as ") {
249 // The segment tries to use fully-qualified syntax, which is currently unsupported.
250 // Give a helpful error message instead of completely ignoring the angle brackets.
251 return Err(MalformedGenerics::HasFullyQualifiedSyntax);
252 }
253 } else {
254 if param_depth == 0 {
255 stripped_segment.push(c);
256 } else {
257 latest_generics_chunk.push(c);
258 }
259 }
260 }
261
262 if param_depth == 0 {
263 Ok(stripped_segment)
264 } else {
265 // The segment has unbalanced angle brackets, e.g. `Vec<T` or `Vec<T>>`
266 Err(MalformedGenerics::UnbalancedAngleBrackets)
267 }
268 }
269
strip_generics_from_path(path_str: &str) -> Result<Box<str>, MalformedGenerics>270 pub fn strip_generics_from_path(path_str: &str) -> Result<Box<str>, MalformedGenerics> {
271 if !path_str.contains(['<', '>']) {
272 return Ok(path_str.into());
273 }
274 let mut stripped_segments = vec![];
275 let mut path = path_str.chars().peekable();
276 let mut segment = Vec::new();
277
278 while let Some(chr) = path.next() {
279 match chr {
280 ':' => {
281 if path.next_if_eq(&':').is_some() {
282 let stripped_segment =
283 strip_generics_from_path_segment(mem::take(&mut segment))?;
284 if !stripped_segment.is_empty() {
285 stripped_segments.push(stripped_segment);
286 }
287 } else {
288 return Err(MalformedGenerics::InvalidPathSeparator);
289 }
290 }
291 '<' => {
292 segment.push(chr);
293
294 match path.next() {
295 Some('<') => {
296 return Err(MalformedGenerics::TooManyAngleBrackets);
297 }
298 Some('>') => {
299 return Err(MalformedGenerics::EmptyAngleBrackets);
300 }
301 Some(chr) => {
302 segment.push(chr);
303
304 while let Some(chr) = path.next_if(|c| *c != '>') {
305 segment.push(chr);
306 }
307 }
308 None => break,
309 }
310 }
311 _ => segment.push(chr),
312 }
313 trace!("raw segment: {:?}", segment);
314 }
315
316 if !segment.is_empty() {
317 let stripped_segment = strip_generics_from_path_segment(segment)?;
318 if !stripped_segment.is_empty() {
319 stripped_segments.push(stripped_segment);
320 }
321 }
322
323 debug!("path_str: {:?}\nstripped segments: {:?}", path_str, &stripped_segments);
324
325 let stripped_path = stripped_segments.join("::");
326
327 if !stripped_path.is_empty() {
328 Ok(stripped_path.into())
329 } else {
330 Err(MalformedGenerics::MissingType)
331 }
332 }
333
334 /// Returns whether the first doc-comment is an inner attribute.
335 ///
336 //// If there are no doc-comments, return true.
337 /// FIXME(#78591): Support both inner and outer attributes on the same item.
inner_docs(attrs: &[ast::Attribute]) -> bool338 pub fn inner_docs(attrs: &[ast::Attribute]) -> bool {
339 attrs.iter().find(|a| a.doc_str().is_some()).map_or(true, |a| a.style == ast::AttrStyle::Inner)
340 }
341
342 /// Has `#[rustc_doc_primitive]` or `#[doc(keyword)]`.
has_primitive_or_keyword_docs(attrs: &[ast::Attribute]) -> bool343 pub fn has_primitive_or_keyword_docs(attrs: &[ast::Attribute]) -> bool {
344 for attr in attrs {
345 if attr.has_name(sym::rustc_doc_primitive) {
346 return true;
347 } else if attr.has_name(sym::doc) && let Some(items) = attr.meta_item_list() {
348 for item in items {
349 if item.has_name(sym::keyword) {
350 return true;
351 }
352 }
353 }
354 }
355 false
356 }
357
358 /// Simplified version of the corresponding function in rustdoc.
359 /// If the rustdoc version returns a successful result, this function must return the same result.
360 /// Otherwise this function may return anything.
preprocess_link(link: &str) -> Box<str>361 fn preprocess_link(link: &str) -> Box<str> {
362 let link = link.replace('`', "");
363 let link = link.split('#').next().unwrap();
364 let link = link.trim();
365 let link = link.rsplit('@').next().unwrap();
366 let link = link.strip_suffix("()").unwrap_or(link);
367 let link = link.strip_suffix("{}").unwrap_or(link);
368 let link = link.strip_suffix("[]").unwrap_or(link);
369 let link = if link != "!" { link.strip_suffix('!').unwrap_or(link) } else { link };
370 let link = link.trim();
371 strip_generics_from_path(link).unwrap_or_else(|_| link.into())
372 }
373
374 /// Keep inline and reference links `[]`,
375 /// but skip autolinks `<>` which we never consider to be intra-doc links.
may_be_doc_link(link_type: LinkType) -> bool376 pub fn may_be_doc_link(link_type: LinkType) -> bool {
377 match link_type {
378 LinkType::Inline
379 | LinkType::Reference
380 | LinkType::ReferenceUnknown
381 | LinkType::Collapsed
382 | LinkType::CollapsedUnknown
383 | LinkType::Shortcut
384 | LinkType::ShortcutUnknown => true,
385 LinkType::Autolink | LinkType::Email => false,
386 }
387 }
388
389 /// Simplified version of `preprocessed_markdown_links` from rustdoc.
390 /// Must return at least the same links as it, but may add some more links on top of that.
attrs_to_preprocessed_links(attrs: &[ast::Attribute]) -> Vec<Box<str>>391 pub(crate) fn attrs_to_preprocessed_links(attrs: &[ast::Attribute]) -> Vec<Box<str>> {
392 let (doc_fragments, _) = attrs_to_doc_fragments(attrs.iter().map(|attr| (attr, None)), true);
393 let doc = prepare_to_doc_link_resolution(&doc_fragments).into_values().next().unwrap();
394
395 Parser::new_with_broken_link_callback(
396 &doc,
397 main_body_opts(),
398 Some(&mut |link: BrokenLink<'_>| Some((link.reference, "".into()))),
399 )
400 .filter_map(|event| match event {
401 Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => {
402 Some(preprocess_link(&dest))
403 }
404 _ => None,
405 })
406 .collect()
407 }
408