• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Formatting and tools for comments.
2 
3 use std::{self, borrow::Cow, iter};
4 
5 use itertools::{multipeek, MultiPeek};
6 use lazy_static::lazy_static;
7 use regex::Regex;
8 use rustc_span::Span;
9 
10 use crate::config::Config;
11 use crate::rewrite::RewriteContext;
12 use crate::shape::{Indent, Shape};
13 use crate::string::{rewrite_string, StringFormat};
14 use crate::utils::{
15     count_newlines, first_line_width, last_line_width, trim_left_preserve_layout,
16     trimmed_last_line_width, unicode_str_width,
17 };
18 use crate::{ErrorKind, FormattingError};
19 
20 lazy_static! {
21     /// A regex matching reference doc links.
22     ///
23     /// ```markdown
24     /// /// An [example].
25     /// ///
26     /// /// [example]: this::is::a::link
27     /// ```
28     static ref REFERENCE_LINK_URL: Regex = Regex::new(r"^\[.+\]\s?:").unwrap();
29 }
30 
is_custom_comment(comment: &str) -> bool31 fn is_custom_comment(comment: &str) -> bool {
32     if !comment.starts_with("//") {
33         false
34     } else if let Some(c) = comment.chars().nth(2) {
35         !c.is_alphanumeric() && !c.is_whitespace()
36     } else {
37         false
38     }
39 }
40 
41 #[derive(Copy, Clone, PartialEq, Eq)]
42 pub(crate) enum CommentStyle<'a> {
43     DoubleSlash,
44     TripleSlash,
45     Doc,
46     SingleBullet,
47     DoubleBullet,
48     Exclamation,
49     Custom(&'a str),
50 }
51 
custom_opener(s: &str) -> &str52 fn custom_opener(s: &str) -> &str {
53     s.lines().next().map_or("", |first_line| {
54         first_line
55             .find(' ')
56             .map_or(first_line, |space_index| &first_line[0..=space_index])
57     })
58 }
59 
60 impl<'a> CommentStyle<'a> {
61     /// Returns `true` if the commenting style covers a line only.
is_line_comment(&self) -> bool62     pub(crate) fn is_line_comment(&self) -> bool {
63         match *self {
64             CommentStyle::DoubleSlash
65             | CommentStyle::TripleSlash
66             | CommentStyle::Doc
67             | CommentStyle::Custom(_) => true,
68             _ => false,
69         }
70     }
71 
72     /// Returns `true` if the commenting style can span over multiple lines.
is_block_comment(&self) -> bool73     pub(crate) fn is_block_comment(&self) -> bool {
74         match *self {
75             CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation => {
76                 true
77             }
78             _ => false,
79         }
80     }
81 
82     /// Returns `true` if the commenting style is for documentation.
is_doc_comment(&self) -> bool83     pub(crate) fn is_doc_comment(&self) -> bool {
84         matches!(*self, CommentStyle::TripleSlash | CommentStyle::Doc)
85     }
86 
opener(&self) -> &'a str87     pub(crate) fn opener(&self) -> &'a str {
88         match *self {
89             CommentStyle::DoubleSlash => "// ",
90             CommentStyle::TripleSlash => "/// ",
91             CommentStyle::Doc => "//! ",
92             CommentStyle::SingleBullet => "/* ",
93             CommentStyle::DoubleBullet => "/** ",
94             CommentStyle::Exclamation => "/*! ",
95             CommentStyle::Custom(opener) => opener,
96         }
97     }
98 
closer(&self) -> &'a str99     pub(crate) fn closer(&self) -> &'a str {
100         match *self {
101             CommentStyle::DoubleSlash
102             | CommentStyle::TripleSlash
103             | CommentStyle::Custom(..)
104             | CommentStyle::Doc => "",
105             CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation => {
106                 " */"
107             }
108         }
109     }
110 
line_start(&self) -> &'a str111     pub(crate) fn line_start(&self) -> &'a str {
112         match *self {
113             CommentStyle::DoubleSlash => "// ",
114             CommentStyle::TripleSlash => "/// ",
115             CommentStyle::Doc => "//! ",
116             CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation => {
117                 " * "
118             }
119             CommentStyle::Custom(opener) => opener,
120         }
121     }
122 
to_str_tuplet(&self) -> (&'a str, &'a str, &'a str)123     pub(crate) fn to_str_tuplet(&self) -> (&'a str, &'a str, &'a str) {
124         (self.opener(), self.closer(), self.line_start())
125     }
126 }
127 
comment_style(orig: &str, normalize_comments: bool) -> CommentStyle<'_>128 pub(crate) fn comment_style(orig: &str, normalize_comments: bool) -> CommentStyle<'_> {
129     if !normalize_comments {
130         if orig.starts_with("/**") && !orig.starts_with("/**/") {
131             CommentStyle::DoubleBullet
132         } else if orig.starts_with("/*!") {
133             CommentStyle::Exclamation
134         } else if orig.starts_with("/*") {
135             CommentStyle::SingleBullet
136         } else if orig.starts_with("///") && orig.chars().nth(3).map_or(true, |c| c != '/') {
137             CommentStyle::TripleSlash
138         } else if orig.starts_with("//!") {
139             CommentStyle::Doc
140         } else if is_custom_comment(orig) {
141             CommentStyle::Custom(custom_opener(orig))
142         } else {
143             CommentStyle::DoubleSlash
144         }
145     } else if (orig.starts_with("///") && orig.chars().nth(3).map_or(true, |c| c != '/'))
146         || (orig.starts_with("/**") && !orig.starts_with("/**/"))
147     {
148         CommentStyle::TripleSlash
149     } else if orig.starts_with("//!") || orig.starts_with("/*!") {
150         CommentStyle::Doc
151     } else if is_custom_comment(orig) {
152         CommentStyle::Custom(custom_opener(orig))
153     } else {
154         CommentStyle::DoubleSlash
155     }
156 }
157 
158 /// Returns true if the last line of the passed string finishes with a block-comment.
is_last_comment_block(s: &str) -> bool159 pub(crate) fn is_last_comment_block(s: &str) -> bool {
160     s.trim_end().ends_with("*/")
161 }
162 
163 /// Combine `prev_str` and `next_str` into a single `String`. `span` may contain
164 /// comments between two strings. If there are such comments, then that will be
165 /// recovered. If `allow_extend` is true and there is no comment between the two
166 /// strings, then they will be put on a single line as long as doing so does not
167 /// exceed max width.
combine_strs_with_missing_comments( context: &RewriteContext<'_>, prev_str: &str, next_str: &str, span: Span, shape: Shape, allow_extend: bool, ) -> Option<String>168 pub(crate) fn combine_strs_with_missing_comments(
169     context: &RewriteContext<'_>,
170     prev_str: &str,
171     next_str: &str,
172     span: Span,
173     shape: Shape,
174     allow_extend: bool,
175 ) -> Option<String> {
176     trace!(
177         "combine_strs_with_missing_comments `{}` `{}` {:?} {:?}",
178         prev_str,
179         next_str,
180         span,
181         shape
182     );
183 
184     let mut result =
185         String::with_capacity(prev_str.len() + next_str.len() + shape.indent.width() + 128);
186     result.push_str(prev_str);
187     let mut allow_one_line = !prev_str.contains('\n') && !next_str.contains('\n');
188     let first_sep =
189         if prev_str.is_empty() || next_str.is_empty() || trimmed_last_line_width(prev_str) == 0 {
190             ""
191         } else {
192             " "
193         };
194     let mut one_line_width =
195         last_line_width(prev_str) + first_line_width(next_str) + first_sep.len();
196 
197     let config = context.config;
198     let indent = shape.indent;
199     let missing_comment = rewrite_missing_comment(span, shape, context)?;
200 
201     if missing_comment.is_empty() {
202         if allow_extend && one_line_width <= shape.width {
203             result.push_str(first_sep);
204         } else if !prev_str.is_empty() {
205             result.push_str(&indent.to_string_with_newline(config))
206         }
207         result.push_str(next_str);
208         return Some(result);
209     }
210 
211     // We have a missing comment between the first expression and the second expression.
212 
213     // Peek the the original source code and find out whether there is a newline between the first
214     // expression and the second expression or the missing comment. We will preserve the original
215     // layout whenever possible.
216     let original_snippet = context.snippet(span);
217     let prefer_same_line = if let Some(pos) = original_snippet.find('/') {
218         !original_snippet[..pos].contains('\n')
219     } else {
220         !original_snippet.contains('\n')
221     };
222 
223     one_line_width -= first_sep.len();
224     let first_sep = if prev_str.is_empty() || missing_comment.is_empty() {
225         Cow::from("")
226     } else {
227         let one_line_width = last_line_width(prev_str) + first_line_width(&missing_comment) + 1;
228         if prefer_same_line && one_line_width <= shape.width {
229             Cow::from(" ")
230         } else {
231             indent.to_string_with_newline(config)
232         }
233     };
234     result.push_str(&first_sep);
235     result.push_str(&missing_comment);
236 
237     let second_sep = if missing_comment.is_empty() || next_str.is_empty() {
238         Cow::from("")
239     } else if missing_comment.starts_with("//") {
240         indent.to_string_with_newline(config)
241     } else {
242         one_line_width += missing_comment.len() + first_sep.len() + 1;
243         allow_one_line &= !missing_comment.starts_with("//") && !missing_comment.contains('\n');
244         if prefer_same_line && allow_one_line && one_line_width <= shape.width {
245             Cow::from(" ")
246         } else {
247             indent.to_string_with_newline(config)
248         }
249     };
250     result.push_str(&second_sep);
251     result.push_str(next_str);
252 
253     Some(result)
254 }
255 
rewrite_doc_comment(orig: &str, shape: Shape, config: &Config) -> Option<String>256 pub(crate) fn rewrite_doc_comment(orig: &str, shape: Shape, config: &Config) -> Option<String> {
257     identify_comment(orig, false, shape, config, true)
258 }
259 
rewrite_comment( orig: &str, block_style: bool, shape: Shape, config: &Config, ) -> Option<String>260 pub(crate) fn rewrite_comment(
261     orig: &str,
262     block_style: bool,
263     shape: Shape,
264     config: &Config,
265 ) -> Option<String> {
266     identify_comment(orig, block_style, shape, config, false)
267 }
268 
identify_comment( orig: &str, block_style: bool, shape: Shape, config: &Config, is_doc_comment: bool, ) -> Option<String>269 fn identify_comment(
270     orig: &str,
271     block_style: bool,
272     shape: Shape,
273     config: &Config,
274     is_doc_comment: bool,
275 ) -> Option<String> {
276     let style = comment_style(orig, false);
277 
278     // Computes the byte length of line taking into account a newline if the line is part of a
279     // paragraph.
280     fn compute_len(orig: &str, line: &str) -> usize {
281         if orig.len() > line.len() {
282             if orig.as_bytes()[line.len()] == b'\r' {
283                 line.len() + 2
284             } else {
285                 line.len() + 1
286             }
287         } else {
288             line.len()
289         }
290     }
291 
292     // Get the first group of line comments having the same commenting style.
293     //
294     // Returns a tuple with:
295     // - a boolean indicating if there is a blank line
296     // - a number indicating the size of the first group of comments
297     fn consume_same_line_comments(
298         style: CommentStyle<'_>,
299         orig: &str,
300         line_start: &str,
301     ) -> (bool, usize) {
302         let mut first_group_ending = 0;
303         let mut hbl = false;
304 
305         for line in orig.lines() {
306             let trimmed_line = line.trim_start();
307             if trimmed_line.is_empty() {
308                 hbl = true;
309                 break;
310             } else if trimmed_line.starts_with(line_start)
311                 || comment_style(trimmed_line, false) == style
312             {
313                 first_group_ending += compute_len(&orig[first_group_ending..], line);
314             } else {
315                 break;
316             }
317         }
318         (hbl, first_group_ending)
319     }
320 
321     let (has_bare_lines, first_group_ending) = match style {
322         CommentStyle::DoubleSlash | CommentStyle::TripleSlash | CommentStyle::Doc => {
323             let line_start = style.line_start().trim_start();
324             consume_same_line_comments(style, orig, line_start)
325         }
326         CommentStyle::Custom(opener) => {
327             let trimmed_opener = opener.trim_end();
328             consume_same_line_comments(style, orig, trimmed_opener)
329         }
330         // for a block comment, search for the closing symbol
331         CommentStyle::DoubleBullet | CommentStyle::SingleBullet | CommentStyle::Exclamation => {
332             let closer = style.closer().trim_start();
333             let mut count = orig.matches(closer).count();
334             let mut closing_symbol_offset = 0;
335             let mut hbl = false;
336             let mut first = true;
337             for line in orig.lines() {
338                 closing_symbol_offset += compute_len(&orig[closing_symbol_offset..], line);
339                 let mut trimmed_line = line.trim_start();
340                 if !trimmed_line.starts_with('*')
341                     && !trimmed_line.starts_with("//")
342                     && !trimmed_line.starts_with("/*")
343                 {
344                     hbl = true;
345                 }
346 
347                 // Remove opener from consideration when searching for closer
348                 if first {
349                     let opener = style.opener().trim_end();
350                     trimmed_line = &trimmed_line[opener.len()..];
351                     first = false;
352                 }
353                 if trimmed_line.ends_with(closer) {
354                     count -= 1;
355                     if count == 0 {
356                         break;
357                     }
358                 }
359             }
360             (hbl, closing_symbol_offset)
361         }
362     };
363 
364     let (first_group, rest) = orig.split_at(first_group_ending);
365     let rewritten_first_group =
366         if !config.normalize_comments() && has_bare_lines && style.is_block_comment() {
367             trim_left_preserve_layout(first_group, shape.indent, config)?
368         } else if !config.normalize_comments()
369             && !config.wrap_comments()
370             && !config.format_code_in_doc_comments()
371         {
372             light_rewrite_comment(first_group, shape.indent, config, is_doc_comment)
373         } else {
374             rewrite_comment_inner(
375                 first_group,
376                 block_style,
377                 style,
378                 shape,
379                 config,
380                 is_doc_comment || style.is_doc_comment(),
381             )?
382         };
383     if rest.is_empty() {
384         Some(rewritten_first_group)
385     } else {
386         identify_comment(
387             rest.trim_start(),
388             block_style,
389             shape,
390             config,
391             is_doc_comment,
392         )
393         .map(|rest_str| {
394             format!(
395                 "{}\n{}{}{}",
396                 rewritten_first_group,
397                 // insert back the blank line
398                 if has_bare_lines && style.is_line_comment() {
399                     "\n"
400                 } else {
401                     ""
402                 },
403                 shape.indent.to_string(config),
404                 rest_str
405             )
406         })
407     }
408 }
409 
410 /// Enum indicating if the code block contains rust based on attributes
411 enum CodeBlockAttribute {
412     Rust,
413     NotRust,
414 }
415 
416 impl CodeBlockAttribute {
417     /// Parse comma separated attributes list. Return rust only if all
418     /// attributes are valid rust attributes
419     /// See <https://doc.rust-lang.org/rustdoc/print.html#attributes>
new(attributes: &str) -> CodeBlockAttribute420     fn new(attributes: &str) -> CodeBlockAttribute {
421         for attribute in attributes.split(',') {
422             match attribute.trim() {
423                 "" | "rust" | "should_panic" | "no_run" | "edition2015" | "edition2018"
424                 | "edition2021" => (),
425                 "ignore" | "compile_fail" | "text" => return CodeBlockAttribute::NotRust,
426                 _ => return CodeBlockAttribute::NotRust,
427             }
428         }
429         CodeBlockAttribute::Rust
430     }
431 }
432 
433 /// Block that is formatted as an item.
434 ///
435 /// An item starts with either a star `*`, a dash `-`, a greater-than `>`, a plus '+', or a number
436 /// `12.` or `34)` (with at most 2 digits). An item represents CommonMark's ["list
437 /// items"](https://spec.commonmark.org/0.30/#list-items) and/or ["block
438 /// quotes"](https://spec.commonmark.org/0.30/#block-quotes), but note that only a subset of
439 /// CommonMark is recognized - see the doc comment of [`ItemizedBlock::get_marker_length`] for more
440 /// details.
441 ///
442 /// Different level of indentation are handled by shrinking the shape accordingly.
443 struct ItemizedBlock {
444     /// the lines that are identified as part of an itemized block
445     lines: Vec<String>,
446     /// the number of characters (typically whitespaces) up to the item marker
447     indent: usize,
448     /// the string that marks the start of an item
449     opener: String,
450     /// sequence of characters (typically whitespaces) to prefix new lines that are part of the item
451     line_start: String,
452 }
453 
454 impl ItemizedBlock {
455     /// Checks whether the `trimmed` line includes an item marker. Returns `None` if there is no
456     /// marker. Returns the length of the marker (in bytes) if one is present. Note that the length
457     /// includes the whitespace that follows the marker, for example the marker in `"* list item"`
458     /// has the length of 2.
459     ///
460     /// This function recognizes item markers that correspond to CommonMark's
461     /// ["bullet list marker"](https://spec.commonmark.org/0.30/#bullet-list-marker),
462     /// ["block quote marker"](https://spec.commonmark.org/0.30/#block-quote-marker), and/or
463     /// ["ordered list marker"](https://spec.commonmark.org/0.30/#ordered-list-marker).
464     ///
465     /// Compared to CommonMark specification, the number of digits that are allowed in an ["ordered
466     /// list marker"](https://spec.commonmark.org/0.30/#ordered-list-marker) is more limited (to at
467     /// most 2 digits). Limiting the length of the marker helps reduce the risk of recognizing
468     /// arbitrary numbers as markers. See also
469     /// <https://talk.commonmark.org/t/blank-lines-before-lists-revisited/1990> which gives the
470     /// following example where a number (i.e. "1868") doesn't signify an ordered list:
471     /// ```md
472     /// The Captain died in
473     /// 1868. He wes buried in...
474     /// ```
get_marker_length(trimmed: &str) -> Option<usize>475     fn get_marker_length(trimmed: &str) -> Option<usize> {
476         // https://spec.commonmark.org/0.30/#bullet-list-marker or
477         // https://spec.commonmark.org/0.30/#block-quote-marker
478         let itemized_start = ["* ", "- ", "> ", "+ "];
479         if itemized_start.iter().any(|s| trimmed.starts_with(s)) {
480             return Some(2); // All items in `itemized_start` have length 2.
481         }
482 
483         // https://spec.commonmark.org/0.30/#ordered-list-marker, where at most 2 digits are
484         // allowed.
485         for suffix in [". ", ") "] {
486             if let Some((prefix, _)) = trimmed.split_once(suffix) {
487                 if prefix.len() <= 2 && prefix.chars().all(|c| char::is_ascii_digit(&c)) {
488                     return Some(prefix.len() + suffix.len());
489                 }
490             }
491         }
492 
493         None // No markers found.
494     }
495 
496     /// Creates a new `ItemizedBlock` described with the given `line`.
497     /// Returns `None` if `line` doesn't start an item.
new(line: &str) -> Option<ItemizedBlock>498     fn new(line: &str) -> Option<ItemizedBlock> {
499         let marker_length = ItemizedBlock::get_marker_length(line.trim_start())?;
500         let space_to_marker = line.chars().take_while(|c| c.is_whitespace()).count();
501         let mut indent = space_to_marker + marker_length;
502         let mut line_start = " ".repeat(indent);
503 
504         // Markdown blockquote start with a "> "
505         if line.trim_start().starts_with(">") {
506             // remove the original +2 indent because there might be multiple nested block quotes
507             // and it's easier to reason about the final indent by just taking the length
508             // of the new line_start. We update the indent because it effects the max width
509             // of each formatted line.
510             line_start = itemized_block_quote_start(line, line_start, 2);
511             indent = line_start.len();
512         }
513         Some(ItemizedBlock {
514             lines: vec![line[indent..].to_string()],
515             indent,
516             opener: line[..indent].to_string(),
517             line_start,
518         })
519     }
520 
521     /// Returns a `StringFormat` used for formatting the content of an item.
create_string_format<'a>(&'a self, fmt: &'a StringFormat<'_>) -> StringFormat<'a>522     fn create_string_format<'a>(&'a self, fmt: &'a StringFormat<'_>) -> StringFormat<'a> {
523         StringFormat {
524             opener: "",
525             closer: "",
526             line_start: "",
527             line_end: "",
528             shape: Shape::legacy(fmt.shape.width.saturating_sub(self.indent), Indent::empty()),
529             trim_end: true,
530             config: fmt.config,
531         }
532     }
533 
534     /// Returns `true` if the line is part of the current itemized block.
535     /// If it is, then it is added to the internal lines list.
add_line(&mut self, line: &str) -> bool536     fn add_line(&mut self, line: &str) -> bool {
537         if ItemizedBlock::get_marker_length(line.trim_start()).is_none()
538             && self.indent <= line.chars().take_while(|c| c.is_whitespace()).count()
539         {
540             self.lines.push(line.to_string());
541             return true;
542         }
543         false
544     }
545 
546     /// Returns the block as a string, with each line trimmed at the start.
trimmed_block_as_string(&self) -> String547     fn trimmed_block_as_string(&self) -> String {
548         self.lines
549             .iter()
550             .map(|line| format!("{} ", line.trim_start()))
551             .collect::<String>()
552     }
553 
554     /// Returns the block as a string under its original form.
original_block_as_string(&self) -> String555     fn original_block_as_string(&self) -> String {
556         self.lines.join("\n")
557     }
558 }
559 
560 /// Determine the line_start when formatting markdown block quotes.
561 /// The original line_start likely contains indentation (whitespaces), which we'd like to
562 /// replace with '> ' characters.
itemized_block_quote_start(line: &str, mut line_start: String, remove_indent: usize) -> String563 fn itemized_block_quote_start(line: &str, mut line_start: String, remove_indent: usize) -> String {
564     let quote_level = line
565         .chars()
566         .take_while(|c| !c.is_alphanumeric())
567         .fold(0, |acc, c| if c == '>' { acc + 1 } else { acc });
568 
569     for _ in 0..remove_indent {
570         line_start.pop();
571     }
572 
573     for _ in 0..quote_level {
574         line_start.push_str("> ")
575     }
576     line_start
577 }
578 
579 struct CommentRewrite<'a> {
580     result: String,
581     code_block_buffer: String,
582     is_prev_line_multi_line: bool,
583     code_block_attr: Option<CodeBlockAttribute>,
584     item_block: Option<ItemizedBlock>,
585     comment_line_separator: String,
586     indent_str: String,
587     max_width: usize,
588     fmt_indent: Indent,
589     fmt: StringFormat<'a>,
590 
591     opener: String,
592     closer: String,
593     line_start: String,
594     style: CommentStyle<'a>,
595 }
596 
597 impl<'a> CommentRewrite<'a> {
new( orig: &'a str, block_style: bool, shape: Shape, config: &'a Config, ) -> CommentRewrite<'a>598     fn new(
599         orig: &'a str,
600         block_style: bool,
601         shape: Shape,
602         config: &'a Config,
603     ) -> CommentRewrite<'a> {
604         let ((opener, closer, line_start), style) = if block_style {
605             (
606                 CommentStyle::SingleBullet.to_str_tuplet(),
607                 CommentStyle::SingleBullet,
608             )
609         } else {
610             let style = comment_style(orig, config.normalize_comments());
611             (style.to_str_tuplet(), style)
612         };
613 
614         let max_width = shape
615             .width
616             .checked_sub(closer.len() + opener.len())
617             .unwrap_or(1);
618         let indent_str = shape.indent.to_string_with_newline(config).to_string();
619 
620         let mut cr = CommentRewrite {
621             result: String::with_capacity(orig.len() * 2),
622             code_block_buffer: String::with_capacity(128),
623             is_prev_line_multi_line: false,
624             code_block_attr: None,
625             item_block: None,
626             comment_line_separator: format!("{}{}", indent_str, line_start),
627             max_width,
628             indent_str,
629             fmt_indent: shape.indent,
630 
631             fmt: StringFormat {
632                 opener: "",
633                 closer: "",
634                 line_start,
635                 line_end: "",
636                 shape: Shape::legacy(max_width, shape.indent),
637                 trim_end: true,
638                 config,
639             },
640 
641             opener: opener.to_owned(),
642             closer: closer.to_owned(),
643             line_start: line_start.to_owned(),
644             style,
645         };
646         cr.result.push_str(opener);
647         cr
648     }
649 
join_block(s: &str, sep: &str) -> String650     fn join_block(s: &str, sep: &str) -> String {
651         let mut result = String::with_capacity(s.len() + 128);
652         let mut iter = s.lines().peekable();
653         while let Some(line) = iter.next() {
654             result.push_str(line);
655             result.push_str(match iter.peek() {
656                 Some(next_line) if next_line.is_empty() => sep.trim_end(),
657                 Some(..) => sep,
658                 None => "",
659             });
660         }
661         result
662     }
663 
664     /// Check if any characters were written to the result buffer after the start of the comment.
665     /// when calling [`CommentRewrite::new()`] the result buffer is initiazlied with the opening
666     /// characters for the comment.
buffer_contains_comment(&self) -> bool667     fn buffer_contains_comment(&self) -> bool {
668         // if self.result.len() < self.opener.len() then an empty comment is in the buffer
669         // if self.result.len() > self.opener.len() then a non empty comment is in the buffer
670         self.result.len() != self.opener.len()
671     }
672 
finish(mut self) -> String673     fn finish(mut self) -> String {
674         if !self.code_block_buffer.is_empty() {
675             // There is a code block that is not properly enclosed by backticks.
676             // We will leave them untouched.
677             self.result.push_str(&self.comment_line_separator);
678             self.result.push_str(&Self::join_block(
679                 &trim_custom_comment_prefix(&self.code_block_buffer),
680                 &self.comment_line_separator,
681             ));
682         }
683 
684         if let Some(ref ib) = self.item_block {
685             // the last few lines are part of an itemized block
686             self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
687             let item_fmt = ib.create_string_format(&self.fmt);
688 
689             // only push a comment_line_separator for ItemizedBlocks if the comment is not empty
690             if self.buffer_contains_comment() {
691                 self.result.push_str(&self.comment_line_separator);
692             }
693 
694             self.result.push_str(&ib.opener);
695             match rewrite_string(
696                 &ib.trimmed_block_as_string(),
697                 &item_fmt,
698                 self.max_width.saturating_sub(ib.indent),
699             ) {
700                 Some(s) => self.result.push_str(&Self::join_block(
701                     &s,
702                     &format!("{}{}", self.comment_line_separator, ib.line_start),
703                 )),
704                 None => self.result.push_str(&Self::join_block(
705                     &ib.original_block_as_string(),
706                     &self.comment_line_separator,
707                 )),
708             };
709         }
710 
711         self.result.push_str(&self.closer);
712         if self.result.ends_with(&self.opener) && self.opener.ends_with(' ') {
713             // Trailing space.
714             self.result.pop();
715         }
716 
717         self.result
718     }
719 
handle_line( &mut self, orig: &'a str, i: usize, line: &'a str, has_leading_whitespace: bool, is_doc_comment: bool, ) -> bool720     fn handle_line(
721         &mut self,
722         orig: &'a str,
723         i: usize,
724         line: &'a str,
725         has_leading_whitespace: bool,
726         is_doc_comment: bool,
727     ) -> bool {
728         let num_newlines = count_newlines(orig);
729         let is_last = i == num_newlines;
730         let needs_new_comment_line = if self.style.is_block_comment() {
731             num_newlines > 0 || self.buffer_contains_comment()
732         } else {
733             self.buffer_contains_comment()
734         };
735 
736         if let Some(ref mut ib) = self.item_block {
737             if ib.add_line(line) {
738                 return false;
739             }
740             self.is_prev_line_multi_line = false;
741             self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
742             let item_fmt = ib.create_string_format(&self.fmt);
743 
744             // only push a comment_line_separator if we need to start a new comment line
745             if needs_new_comment_line {
746                 self.result.push_str(&self.comment_line_separator);
747             }
748 
749             self.result.push_str(&ib.opener);
750             match rewrite_string(
751                 &ib.trimmed_block_as_string(),
752                 &item_fmt,
753                 self.max_width.saturating_sub(ib.indent),
754             ) {
755                 Some(s) => self.result.push_str(&Self::join_block(
756                     &s,
757                     &format!("{}{}", self.comment_line_separator, ib.line_start),
758                 )),
759                 None => self.result.push_str(&Self::join_block(
760                     &ib.original_block_as_string(),
761                     &self.comment_line_separator,
762                 )),
763             };
764         } else if self.code_block_attr.is_some() {
765             if line.starts_with("```") {
766                 let code_block = match self.code_block_attr.as_ref().unwrap() {
767                     CodeBlockAttribute::Rust
768                         if self.fmt.config.format_code_in_doc_comments()
769                             && !self.code_block_buffer.trim().is_empty() =>
770                     {
771                         let mut config = self.fmt.config.clone();
772                         config.set().wrap_comments(false);
773                         let comment_max_width = config
774                             .doc_comment_code_block_width()
775                             .min(config.max_width());
776                         config.set().max_width(comment_max_width);
777                         if let Some(s) =
778                             crate::format_code_block(&self.code_block_buffer, &config, false)
779                         {
780                             trim_custom_comment_prefix(&s.snippet)
781                         } else {
782                             trim_custom_comment_prefix(&self.code_block_buffer)
783                         }
784                     }
785                     _ => trim_custom_comment_prefix(&self.code_block_buffer),
786                 };
787                 if !code_block.is_empty() {
788                     self.result.push_str(&self.comment_line_separator);
789                     self.result
790                         .push_str(&Self::join_block(&code_block, &self.comment_line_separator));
791                 }
792                 self.code_block_buffer.clear();
793                 self.result.push_str(&self.comment_line_separator);
794                 self.result.push_str(line);
795                 self.code_block_attr = None;
796             } else {
797                 self.code_block_buffer
798                     .push_str(&hide_sharp_behind_comment(line));
799                 self.code_block_buffer.push('\n');
800             }
801             return false;
802         }
803 
804         self.code_block_attr = None;
805         self.item_block = None;
806         if let Some(stripped) = line.strip_prefix("```") {
807             self.code_block_attr = Some(CodeBlockAttribute::new(stripped))
808         } else if self.fmt.config.wrap_comments() {
809             if let Some(ib) = ItemizedBlock::new(line) {
810                 self.item_block = Some(ib);
811                 return false;
812             }
813         }
814 
815         if self.result == self.opener {
816             let force_leading_whitespace = &self.opener == "/* " && count_newlines(orig) == 0;
817             if !has_leading_whitespace && !force_leading_whitespace && self.result.ends_with(' ') {
818                 self.result.pop();
819             }
820             if line.is_empty() {
821                 return false;
822             }
823         } else if self.is_prev_line_multi_line && !line.is_empty() {
824             self.result.push(' ')
825         } else if is_last && line.is_empty() {
826             // trailing blank lines are unwanted
827             if !self.closer.is_empty() {
828                 self.result.push_str(&self.indent_str);
829             }
830             return true;
831         } else {
832             self.result.push_str(&self.comment_line_separator);
833             if !has_leading_whitespace && self.result.ends_with(' ') {
834                 self.result.pop();
835             }
836         }
837 
838         let is_markdown_header_doc_comment = is_doc_comment && line.starts_with("#");
839 
840         // We only want to wrap the comment if:
841         // 1) wrap_comments = true is configured
842         // 2) The comment is not the start of a markdown header doc comment
843         // 3) The comment width exceeds the shape's width
844         // 4) No URLS were found in the comment
845         // If this changes, the documentation in ../Configurations.md#wrap_comments
846         // should be changed accordingly.
847         let should_wrap_comment = self.fmt.config.wrap_comments()
848             && !is_markdown_header_doc_comment
849             && unicode_str_width(line) > self.fmt.shape.width
850             && !has_url(line)
851             && !is_table_item(line);
852 
853         if should_wrap_comment {
854             match rewrite_string(line, &self.fmt, self.max_width) {
855                 Some(ref s) => {
856                     self.is_prev_line_multi_line = s.contains('\n');
857                     self.result.push_str(s);
858                 }
859                 None if self.is_prev_line_multi_line => {
860                     // We failed to put the current `line` next to the previous `line`.
861                     // Remove the trailing space, then start rewrite on the next line.
862                     self.result.pop();
863                     self.result.push_str(&self.comment_line_separator);
864                     self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
865                     match rewrite_string(line, &self.fmt, self.max_width) {
866                         Some(ref s) => {
867                             self.is_prev_line_multi_line = s.contains('\n');
868                             self.result.push_str(s);
869                         }
870                         None => {
871                             self.is_prev_line_multi_line = false;
872                             self.result.push_str(line);
873                         }
874                     }
875                 }
876                 None => {
877                     self.is_prev_line_multi_line = false;
878                     self.result.push_str(line);
879                 }
880             }
881 
882             self.fmt.shape = if self.is_prev_line_multi_line {
883                 // 1 = " "
884                 let offset = 1 + last_line_width(&self.result) - self.line_start.len();
885                 Shape {
886                     width: self.max_width.saturating_sub(offset),
887                     indent: self.fmt_indent,
888                     offset: self.fmt.shape.offset + offset,
889                 }
890             } else {
891                 Shape::legacy(self.max_width, self.fmt_indent)
892             };
893         } else {
894             if line.is_empty() && self.result.ends_with(' ') && !is_last {
895                 // Remove space if this is an empty comment or a doc comment.
896                 self.result.pop();
897             }
898             self.result.push_str(line);
899             self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
900             self.is_prev_line_multi_line = false;
901         }
902 
903         false
904     }
905 }
906 
rewrite_comment_inner( orig: &str, block_style: bool, style: CommentStyle<'_>, shape: Shape, config: &Config, is_doc_comment: bool, ) -> Option<String>907 fn rewrite_comment_inner(
908     orig: &str,
909     block_style: bool,
910     style: CommentStyle<'_>,
911     shape: Shape,
912     config: &Config,
913     is_doc_comment: bool,
914 ) -> Option<String> {
915     let mut rewriter = CommentRewrite::new(orig, block_style, shape, config);
916 
917     let line_breaks = count_newlines(orig.trim_end());
918     let lines = orig
919         .lines()
920         .enumerate()
921         .map(|(i, mut line)| {
922             line = trim_end_unless_two_whitespaces(line.trim_start(), is_doc_comment);
923             // Drop old closer.
924             if i == line_breaks && line.ends_with("*/") && !line.starts_with("//") {
925                 line = line[..(line.len() - 2)].trim_end();
926             }
927 
928             line
929         })
930         .map(|s| left_trim_comment_line(s, &style))
931         .map(|(line, has_leading_whitespace)| {
932             if orig.starts_with("/*") && line_breaks == 0 {
933                 (
934                     line.trim_start(),
935                     has_leading_whitespace || config.normalize_comments(),
936                 )
937             } else {
938                 (line, has_leading_whitespace || config.normalize_comments())
939             }
940         });
941 
942     for (i, (line, has_leading_whitespace)) in lines.enumerate() {
943         if rewriter.handle_line(orig, i, line, has_leading_whitespace, is_doc_comment) {
944             break;
945         }
946     }
947 
948     Some(rewriter.finish())
949 }
950 
951 const RUSTFMT_CUSTOM_COMMENT_PREFIX: &str = "//#### ";
952 
hide_sharp_behind_comment(s: &str) -> Cow<'_, str>953 fn hide_sharp_behind_comment(s: &str) -> Cow<'_, str> {
954     let s_trimmed = s.trim();
955     if s_trimmed.starts_with("# ") || s_trimmed == "#" {
956         Cow::from(format!("{}{}", RUSTFMT_CUSTOM_COMMENT_PREFIX, s))
957     } else {
958         Cow::from(s)
959     }
960 }
961 
trim_custom_comment_prefix(s: &str) -> String962 fn trim_custom_comment_prefix(s: &str) -> String {
963     s.lines()
964         .map(|line| {
965             let left_trimmed = line.trim_start();
966             if left_trimmed.starts_with(RUSTFMT_CUSTOM_COMMENT_PREFIX) {
967                 left_trimmed.trim_start_matches(RUSTFMT_CUSTOM_COMMENT_PREFIX)
968             } else {
969                 line
970             }
971         })
972         .collect::<Vec<_>>()
973         .join("\n")
974 }
975 
976 /// Returns `true` if the given string MAY include URLs or alike.
has_url(s: &str) -> bool977 fn has_url(s: &str) -> bool {
978     // This function may return false positive, but should get its job done in most cases.
979     s.contains("https://")
980         || s.contains("http://")
981         || s.contains("ftp://")
982         || s.contains("file://")
983         || REFERENCE_LINK_URL.is_match(s)
984 }
985 
986 /// Returns true if the given string may be part of a Markdown table.
is_table_item(mut s: &str) -> bool987 fn is_table_item(mut s: &str) -> bool {
988     // This function may return false positive, but should get its job done in most cases (i.e.
989     // markdown tables with two column delimiters).
990     s = s.trim_start();
991     return s.starts_with('|')
992         && match s.rfind('|') {
993             Some(0) | None => false,
994             _ => true,
995         };
996 }
997 
998 /// Given the span, rewrite the missing comment inside it if available.
999 /// Note that the given span must only include comments (or leading/trailing whitespaces).
rewrite_missing_comment( span: Span, shape: Shape, context: &RewriteContext<'_>, ) -> Option<String>1000 pub(crate) fn rewrite_missing_comment(
1001     span: Span,
1002     shape: Shape,
1003     context: &RewriteContext<'_>,
1004 ) -> Option<String> {
1005     let missing_snippet = context.snippet(span);
1006     let trimmed_snippet = missing_snippet.trim();
1007     // check the span starts with a comment
1008     let pos = trimmed_snippet.find('/');
1009     if !trimmed_snippet.is_empty() && pos.is_some() {
1010         rewrite_comment(trimmed_snippet, false, shape, context.config)
1011     } else {
1012         Some(String::new())
1013     }
1014 }
1015 
1016 /// Recover the missing comments in the specified span, if available.
1017 /// The layout of the comments will be preserved as long as it does not break the code
1018 /// and its total width does not exceed the max width.
recover_missing_comment_in_span( span: Span, shape: Shape, context: &RewriteContext<'_>, used_width: usize, ) -> Option<String>1019 pub(crate) fn recover_missing_comment_in_span(
1020     span: Span,
1021     shape: Shape,
1022     context: &RewriteContext<'_>,
1023     used_width: usize,
1024 ) -> Option<String> {
1025     let missing_comment = rewrite_missing_comment(span, shape, context)?;
1026     if missing_comment.is_empty() {
1027         Some(String::new())
1028     } else {
1029         let missing_snippet = context.snippet(span);
1030         let pos = missing_snippet.find('/')?;
1031         // 1 = ` `
1032         let total_width = missing_comment.len() + used_width + 1;
1033         let force_new_line_before_comment =
1034             missing_snippet[..pos].contains('\n') || total_width > context.config.max_width();
1035         let sep = if force_new_line_before_comment {
1036             shape.indent.to_string_with_newline(context.config)
1037         } else {
1038             Cow::from(" ")
1039         };
1040         Some(format!("{}{}", sep, missing_comment))
1041     }
1042 }
1043 
1044 /// Trim trailing whitespaces unless they consist of two or more whitespaces.
trim_end_unless_two_whitespaces(s: &str, is_doc_comment: bool) -> &str1045 fn trim_end_unless_two_whitespaces(s: &str, is_doc_comment: bool) -> &str {
1046     if is_doc_comment && s.ends_with("  ") {
1047         s
1048     } else {
1049         s.trim_end()
1050     }
1051 }
1052 
1053 /// Trims whitespace and aligns to indent, but otherwise does not change comments.
light_rewrite_comment( orig: &str, offset: Indent, config: &Config, is_doc_comment: bool, ) -> String1054 fn light_rewrite_comment(
1055     orig: &str,
1056     offset: Indent,
1057     config: &Config,
1058     is_doc_comment: bool,
1059 ) -> String {
1060     let lines: Vec<&str> = orig
1061         .lines()
1062         .map(|l| {
1063             // This is basically just l.trim(), but in the case that a line starts
1064             // with `*` we want to leave one space before it, so it aligns with the
1065             // `*` in `/*`.
1066             let first_non_whitespace = l.find(|c| !char::is_whitespace(c));
1067             let left_trimmed = if let Some(fnw) = first_non_whitespace {
1068                 if l.as_bytes()[fnw] == b'*' && fnw > 0 {
1069                     &l[fnw - 1..]
1070                 } else {
1071                     &l[fnw..]
1072                 }
1073             } else {
1074                 ""
1075             };
1076             // Preserve markdown's double-space line break syntax in doc comment.
1077             trim_end_unless_two_whitespaces(left_trimmed, is_doc_comment)
1078         })
1079         .collect();
1080     lines.join(&format!("\n{}", offset.to_string(config)))
1081 }
1082 
1083 /// Trims comment characters and possibly a single space from the left of a string.
1084 /// Does not trim all whitespace. If a single space is trimmed from the left of the string,
1085 /// this function returns true.
left_trim_comment_line<'a>(line: &'a str, style: &CommentStyle<'_>) -> (&'a str, bool)1086 fn left_trim_comment_line<'a>(line: &'a str, style: &CommentStyle<'_>) -> (&'a str, bool) {
1087     if line.starts_with("//! ")
1088         || line.starts_with("/// ")
1089         || line.starts_with("/*! ")
1090         || line.starts_with("/** ")
1091     {
1092         (&line[4..], true)
1093     } else if let CommentStyle::Custom(opener) = *style {
1094         if let Some(stripped) = line.strip_prefix(opener) {
1095             (stripped, true)
1096         } else {
1097             (&line[opener.trim_end().len()..], false)
1098         }
1099     } else if line.starts_with("/* ")
1100         || line.starts_with("// ")
1101         || line.starts_with("//!")
1102         || line.starts_with("///")
1103         || line.starts_with("** ")
1104         || line.starts_with("/*!")
1105         || (line.starts_with("/**") && !line.starts_with("/**/"))
1106     {
1107         (&line[3..], line.chars().nth(2).unwrap() == ' ')
1108     } else if line.starts_with("/*")
1109         || line.starts_with("* ")
1110         || line.starts_with("//")
1111         || line.starts_with("**")
1112     {
1113         (&line[2..], line.chars().nth(1).unwrap() == ' ')
1114     } else if let Some(stripped) = line.strip_prefix('*') {
1115         (stripped, false)
1116     } else {
1117         (line, line.starts_with(' '))
1118     }
1119 }
1120 
1121 pub(crate) trait FindUncommented {
find_uncommented(&self, pat: &str) -> Option<usize>1122     fn find_uncommented(&self, pat: &str) -> Option<usize>;
find_last_uncommented(&self, pat: &str) -> Option<usize>1123     fn find_last_uncommented(&self, pat: &str) -> Option<usize>;
1124 }
1125 
1126 impl FindUncommented for str {
find_uncommented(&self, pat: &str) -> Option<usize>1127     fn find_uncommented(&self, pat: &str) -> Option<usize> {
1128         let mut needle_iter = pat.chars();
1129         for (kind, (i, b)) in CharClasses::new(self.char_indices()) {
1130             match needle_iter.next() {
1131                 None => {
1132                     return Some(i - pat.len());
1133                 }
1134                 Some(c) => match kind {
1135                     FullCodeCharKind::Normal | FullCodeCharKind::InString if b == c => {}
1136                     _ => {
1137                         needle_iter = pat.chars();
1138                     }
1139                 },
1140             }
1141         }
1142 
1143         // Handle case where the pattern is a suffix of the search string
1144         match needle_iter.next() {
1145             Some(_) => None,
1146             None => Some(self.len() - pat.len()),
1147         }
1148     }
1149 
find_last_uncommented(&self, pat: &str) -> Option<usize>1150     fn find_last_uncommented(&self, pat: &str) -> Option<usize> {
1151         if let Some(left) = self.find_uncommented(pat) {
1152             let mut result = left;
1153             // add 1 to use find_last_uncommented for &str after pat
1154             while let Some(next) = self[(result + 1)..].find_last_uncommented(pat) {
1155                 result += next + 1;
1156             }
1157             Some(result)
1158         } else {
1159             None
1160         }
1161     }
1162 }
1163 
1164 // Returns the first byte position after the first comment. The given string
1165 // is expected to be prefixed by a comment, including delimiters.
1166 // Good: `/* /* inner */ outer */ code();`
1167 // Bad:  `code(); // hello\n world!`
find_comment_end(s: &str) -> Option<usize>1168 pub(crate) fn find_comment_end(s: &str) -> Option<usize> {
1169     let mut iter = CharClasses::new(s.char_indices());
1170     for (kind, (i, _c)) in &mut iter {
1171         if kind == FullCodeCharKind::Normal || kind == FullCodeCharKind::InString {
1172             return Some(i);
1173         }
1174     }
1175 
1176     // Handle case where the comment ends at the end of `s`.
1177     if iter.status == CharClassesStatus::Normal {
1178         Some(s.len())
1179     } else {
1180         None
1181     }
1182 }
1183 
1184 /// Returns `true` if text contains any comment.
contains_comment(text: &str) -> bool1185 pub(crate) fn contains_comment(text: &str) -> bool {
1186     CharClasses::new(text.chars()).any(|(kind, _)| kind.is_comment())
1187 }
1188 
1189 pub(crate) struct CharClasses<T>
1190 where
1191     T: Iterator,
1192     T::Item: RichChar,
1193 {
1194     base: MultiPeek<T>,
1195     status: CharClassesStatus,
1196 }
1197 
1198 pub(crate) trait RichChar {
get_char(&self) -> char1199     fn get_char(&self) -> char;
1200 }
1201 
1202 impl RichChar for char {
get_char(&self) -> char1203     fn get_char(&self) -> char {
1204         *self
1205     }
1206 }
1207 
1208 impl RichChar for (usize, char) {
get_char(&self) -> char1209     fn get_char(&self) -> char {
1210         self.1
1211     }
1212 }
1213 
1214 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
1215 enum CharClassesStatus {
1216     Normal,
1217     /// Character is within a string
1218     LitString,
1219     LitStringEscape,
1220     /// Character is within a raw string
1221     LitRawString(u32),
1222     RawStringPrefix(u32),
1223     RawStringSuffix(u32),
1224     LitChar,
1225     LitCharEscape,
1226     /// Character inside a block comment, with the integer indicating the nesting deepness of the
1227     /// comment
1228     BlockComment(u32),
1229     /// Character inside a block-commented string, with the integer indicating the nesting deepness
1230     /// of the comment
1231     StringInBlockComment(u32),
1232     /// Status when the '/' has been consumed, but not yet the '*', deepness is
1233     /// the new deepness (after the comment opening).
1234     BlockCommentOpening(u32),
1235     /// Status when the '*' has been consumed, but not yet the '/', deepness is
1236     /// the new deepness (after the comment closing).
1237     BlockCommentClosing(u32),
1238     /// Character is within a line comment
1239     LineComment,
1240 }
1241 
1242 /// Distinguish between functional part of code and comments
1243 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
1244 pub(crate) enum CodeCharKind {
1245     Normal,
1246     Comment,
1247 }
1248 
1249 /// Distinguish between functional part of code and comments,
1250 /// describing opening and closing of comments for ease when chunking
1251 /// code from tagged characters
1252 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
1253 pub(crate) enum FullCodeCharKind {
1254     Normal,
1255     /// The first character of a comment, there is only one for a comment (always '/')
1256     StartComment,
1257     /// Any character inside a comment including the second character of comment
1258     /// marks ("//", "/*")
1259     InComment,
1260     /// Last character of a comment, '\n' for a line comment, '/' for a block comment.
1261     EndComment,
1262     /// Start of a mutlitine string inside a comment
1263     StartStringCommented,
1264     /// End of a mutlitine string inside a comment
1265     EndStringCommented,
1266     /// Inside a commented string
1267     InStringCommented,
1268     /// Start of a mutlitine string
1269     StartString,
1270     /// End of a mutlitine string
1271     EndString,
1272     /// Inside a string.
1273     InString,
1274 }
1275 
1276 impl FullCodeCharKind {
is_comment(self) -> bool1277     pub(crate) fn is_comment(self) -> bool {
1278         match self {
1279             FullCodeCharKind::StartComment
1280             | FullCodeCharKind::InComment
1281             | FullCodeCharKind::EndComment
1282             | FullCodeCharKind::StartStringCommented
1283             | FullCodeCharKind::InStringCommented
1284             | FullCodeCharKind::EndStringCommented => true,
1285             _ => false,
1286         }
1287     }
1288 
1289     /// Returns true if the character is inside a comment
inside_comment(self) -> bool1290     pub(crate) fn inside_comment(self) -> bool {
1291         match self {
1292             FullCodeCharKind::InComment
1293             | FullCodeCharKind::StartStringCommented
1294             | FullCodeCharKind::InStringCommented
1295             | FullCodeCharKind::EndStringCommented => true,
1296             _ => false,
1297         }
1298     }
1299 
is_string(self) -> bool1300     pub(crate) fn is_string(self) -> bool {
1301         self == FullCodeCharKind::InString || self == FullCodeCharKind::StartString
1302     }
1303 
1304     /// Returns true if the character is within a commented string
is_commented_string(self) -> bool1305     pub(crate) fn is_commented_string(self) -> bool {
1306         self == FullCodeCharKind::InStringCommented
1307             || self == FullCodeCharKind::StartStringCommented
1308     }
1309 
to_codecharkind(self) -> CodeCharKind1310     fn to_codecharkind(self) -> CodeCharKind {
1311         if self.is_comment() {
1312             CodeCharKind::Comment
1313         } else {
1314             CodeCharKind::Normal
1315         }
1316     }
1317 }
1318 
1319 impl<T> CharClasses<T>
1320 where
1321     T: Iterator,
1322     T::Item: RichChar,
1323 {
new(base: T) -> CharClasses<T>1324     pub(crate) fn new(base: T) -> CharClasses<T> {
1325         CharClasses {
1326             base: multipeek(base),
1327             status: CharClassesStatus::Normal,
1328         }
1329     }
1330 }
1331 
is_raw_string_suffix<T>(iter: &mut MultiPeek<T>, count: u32) -> bool where T: Iterator, T::Item: RichChar,1332 fn is_raw_string_suffix<T>(iter: &mut MultiPeek<T>, count: u32) -> bool
1333 where
1334     T: Iterator,
1335     T::Item: RichChar,
1336 {
1337     for _ in 0..count {
1338         match iter.peek() {
1339             Some(c) if c.get_char() == '#' => continue,
1340             _ => return false,
1341         }
1342     }
1343     true
1344 }
1345 
1346 impl<T> Iterator for CharClasses<T>
1347 where
1348     T: Iterator,
1349     T::Item: RichChar,
1350 {
1351     type Item = (FullCodeCharKind, T::Item);
1352 
next(&mut self) -> Option<(FullCodeCharKind, T::Item)>1353     fn next(&mut self) -> Option<(FullCodeCharKind, T::Item)> {
1354         let item = self.base.next()?;
1355         let chr = item.get_char();
1356         let mut char_kind = FullCodeCharKind::Normal;
1357         self.status = match self.status {
1358             CharClassesStatus::LitRawString(sharps) => {
1359                 char_kind = FullCodeCharKind::InString;
1360                 match chr {
1361                     '"' => {
1362                         if sharps == 0 {
1363                             char_kind = FullCodeCharKind::Normal;
1364                             CharClassesStatus::Normal
1365                         } else if is_raw_string_suffix(&mut self.base, sharps) {
1366                             CharClassesStatus::RawStringSuffix(sharps)
1367                         } else {
1368                             CharClassesStatus::LitRawString(sharps)
1369                         }
1370                     }
1371                     _ => CharClassesStatus::LitRawString(sharps),
1372                 }
1373             }
1374             CharClassesStatus::RawStringPrefix(sharps) => {
1375                 char_kind = FullCodeCharKind::InString;
1376                 match chr {
1377                     '#' => CharClassesStatus::RawStringPrefix(sharps + 1),
1378                     '"' => CharClassesStatus::LitRawString(sharps),
1379                     _ => CharClassesStatus::Normal, // Unreachable.
1380                 }
1381             }
1382             CharClassesStatus::RawStringSuffix(sharps) => {
1383                 match chr {
1384                     '#' => {
1385                         if sharps == 1 {
1386                             CharClassesStatus::Normal
1387                         } else {
1388                             char_kind = FullCodeCharKind::InString;
1389                             CharClassesStatus::RawStringSuffix(sharps - 1)
1390                         }
1391                     }
1392                     _ => CharClassesStatus::Normal, // Unreachable
1393                 }
1394             }
1395             CharClassesStatus::LitString => {
1396                 char_kind = FullCodeCharKind::InString;
1397                 match chr {
1398                     '"' => CharClassesStatus::Normal,
1399                     '\\' => CharClassesStatus::LitStringEscape,
1400                     _ => CharClassesStatus::LitString,
1401                 }
1402             }
1403             CharClassesStatus::LitStringEscape => {
1404                 char_kind = FullCodeCharKind::InString;
1405                 CharClassesStatus::LitString
1406             }
1407             CharClassesStatus::LitChar => match chr {
1408                 '\\' => CharClassesStatus::LitCharEscape,
1409                 '\'' => CharClassesStatus::Normal,
1410                 _ => CharClassesStatus::LitChar,
1411             },
1412             CharClassesStatus::LitCharEscape => CharClassesStatus::LitChar,
1413             CharClassesStatus::Normal => match chr {
1414                 'r' => match self.base.peek().map(RichChar::get_char) {
1415                     Some('#') | Some('"') => {
1416                         char_kind = FullCodeCharKind::InString;
1417                         CharClassesStatus::RawStringPrefix(0)
1418                     }
1419                     _ => CharClassesStatus::Normal,
1420                 },
1421                 '"' => {
1422                     char_kind = FullCodeCharKind::InString;
1423                     CharClassesStatus::LitString
1424                 }
1425                 '\'' => {
1426                     // HACK: Work around mut borrow.
1427                     match self.base.peek() {
1428                         Some(next) if next.get_char() == '\\' => {
1429                             self.status = CharClassesStatus::LitChar;
1430                             return Some((char_kind, item));
1431                         }
1432                         _ => (),
1433                     }
1434 
1435                     match self.base.peek() {
1436                         Some(next) if next.get_char() == '\'' => CharClassesStatus::LitChar,
1437                         _ => CharClassesStatus::Normal,
1438                     }
1439                 }
1440                 '/' => match self.base.peek() {
1441                     Some(next) if next.get_char() == '*' => {
1442                         self.status = CharClassesStatus::BlockCommentOpening(1);
1443                         return Some((FullCodeCharKind::StartComment, item));
1444                     }
1445                     Some(next) if next.get_char() == '/' => {
1446                         self.status = CharClassesStatus::LineComment;
1447                         return Some((FullCodeCharKind::StartComment, item));
1448                     }
1449                     _ => CharClassesStatus::Normal,
1450                 },
1451                 _ => CharClassesStatus::Normal,
1452             },
1453             CharClassesStatus::StringInBlockComment(deepness) => {
1454                 char_kind = FullCodeCharKind::InStringCommented;
1455                 if chr == '"' {
1456                     CharClassesStatus::BlockComment(deepness)
1457                 } else if chr == '*' && self.base.peek().map(RichChar::get_char) == Some('/') {
1458                     char_kind = FullCodeCharKind::InComment;
1459                     CharClassesStatus::BlockCommentClosing(deepness - 1)
1460                 } else {
1461                     CharClassesStatus::StringInBlockComment(deepness)
1462                 }
1463             }
1464             CharClassesStatus::BlockComment(deepness) => {
1465                 assert_ne!(deepness, 0);
1466                 char_kind = FullCodeCharKind::InComment;
1467                 match self.base.peek() {
1468                     Some(next) if next.get_char() == '/' && chr == '*' => {
1469                         CharClassesStatus::BlockCommentClosing(deepness - 1)
1470                     }
1471                     Some(next) if next.get_char() == '*' && chr == '/' => {
1472                         CharClassesStatus::BlockCommentOpening(deepness + 1)
1473                     }
1474                     _ if chr == '"' => CharClassesStatus::StringInBlockComment(deepness),
1475                     _ => self.status,
1476                 }
1477             }
1478             CharClassesStatus::BlockCommentOpening(deepness) => {
1479                 assert_eq!(chr, '*');
1480                 self.status = CharClassesStatus::BlockComment(deepness);
1481                 return Some((FullCodeCharKind::InComment, item));
1482             }
1483             CharClassesStatus::BlockCommentClosing(deepness) => {
1484                 assert_eq!(chr, '/');
1485                 if deepness == 0 {
1486                     self.status = CharClassesStatus::Normal;
1487                     return Some((FullCodeCharKind::EndComment, item));
1488                 } else {
1489                     self.status = CharClassesStatus::BlockComment(deepness);
1490                     return Some((FullCodeCharKind::InComment, item));
1491                 }
1492             }
1493             CharClassesStatus::LineComment => match chr {
1494                 '\n' => {
1495                     self.status = CharClassesStatus::Normal;
1496                     return Some((FullCodeCharKind::EndComment, item));
1497                 }
1498                 _ => {
1499                     self.status = CharClassesStatus::LineComment;
1500                     return Some((FullCodeCharKind::InComment, item));
1501                 }
1502             },
1503         };
1504         Some((char_kind, item))
1505     }
1506 }
1507 
1508 /// An iterator over the lines of a string, paired with the char kind at the
1509 /// end of the line.
1510 pub(crate) struct LineClasses<'a> {
1511     base: iter::Peekable<CharClasses<std::str::Chars<'a>>>,
1512     kind: FullCodeCharKind,
1513 }
1514 
1515 impl<'a> LineClasses<'a> {
new(s: &'a str) -> Self1516     pub(crate) fn new(s: &'a str) -> Self {
1517         LineClasses {
1518             base: CharClasses::new(s.chars()).peekable(),
1519             kind: FullCodeCharKind::Normal,
1520         }
1521     }
1522 }
1523 
1524 impl<'a> Iterator for LineClasses<'a> {
1525     type Item = (FullCodeCharKind, String);
1526 
next(&mut self) -> Option<Self::Item>1527     fn next(&mut self) -> Option<Self::Item> {
1528         self.base.peek()?;
1529 
1530         let mut line = String::new();
1531 
1532         let start_kind = match self.base.peek() {
1533             Some((kind, _)) => *kind,
1534             None => unreachable!(),
1535         };
1536 
1537         for (kind, c) in self.base.by_ref() {
1538             // needed to set the kind of the ending character on the last line
1539             self.kind = kind;
1540             if c == '\n' {
1541                 self.kind = match (start_kind, kind) {
1542                     (FullCodeCharKind::Normal, FullCodeCharKind::InString) => {
1543                         FullCodeCharKind::StartString
1544                     }
1545                     (FullCodeCharKind::InString, FullCodeCharKind::Normal) => {
1546                         FullCodeCharKind::EndString
1547                     }
1548                     (FullCodeCharKind::InComment, FullCodeCharKind::InStringCommented) => {
1549                         FullCodeCharKind::StartStringCommented
1550                     }
1551                     (FullCodeCharKind::InStringCommented, FullCodeCharKind::InComment) => {
1552                         FullCodeCharKind::EndStringCommented
1553                     }
1554                     _ => kind,
1555                 };
1556                 break;
1557             }
1558             line.push(c);
1559         }
1560 
1561         // Workaround for CRLF newline.
1562         if line.ends_with('\r') {
1563             line.pop();
1564         }
1565 
1566         Some((self.kind, line))
1567     }
1568 }
1569 
1570 /// Iterator over functional and commented parts of a string. Any part of a string is either
1571 /// functional code, either *one* block comment, either *one* line comment. Whitespace between
1572 /// comments is functional code. Line comments contain their ending newlines.
1573 struct UngroupedCommentCodeSlices<'a> {
1574     slice: &'a str,
1575     iter: iter::Peekable<CharClasses<std::str::CharIndices<'a>>>,
1576 }
1577 
1578 impl<'a> UngroupedCommentCodeSlices<'a> {
new(code: &'a str) -> UngroupedCommentCodeSlices<'a>1579     fn new(code: &'a str) -> UngroupedCommentCodeSlices<'a> {
1580         UngroupedCommentCodeSlices {
1581             slice: code,
1582             iter: CharClasses::new(code.char_indices()).peekable(),
1583         }
1584     }
1585 }
1586 
1587 impl<'a> Iterator for UngroupedCommentCodeSlices<'a> {
1588     type Item = (CodeCharKind, usize, &'a str);
1589 
next(&mut self) -> Option<Self::Item>1590     fn next(&mut self) -> Option<Self::Item> {
1591         let (kind, (start_idx, _)) = self.iter.next()?;
1592         match kind {
1593             FullCodeCharKind::Normal | FullCodeCharKind::InString => {
1594                 // Consume all the Normal code
1595                 while let Some(&(char_kind, _)) = self.iter.peek() {
1596                     if char_kind.is_comment() {
1597                         break;
1598                     }
1599                     let _ = self.iter.next();
1600                 }
1601             }
1602             FullCodeCharKind::StartComment => {
1603                 // Consume the whole comment
1604                 loop {
1605                     match self.iter.next() {
1606                         Some((kind, ..)) if kind.inside_comment() => continue,
1607                         _ => break,
1608                     }
1609                 }
1610             }
1611             _ => panic!(),
1612         }
1613         let slice = match self.iter.peek() {
1614             Some(&(_, (end_idx, _))) => &self.slice[start_idx..end_idx],
1615             None => &self.slice[start_idx..],
1616         };
1617         Some((
1618             if kind.is_comment() {
1619                 CodeCharKind::Comment
1620             } else {
1621                 CodeCharKind::Normal
1622             },
1623             start_idx,
1624             slice,
1625         ))
1626     }
1627 }
1628 
1629 /// Iterator over an alternating sequence of functional and commented parts of
1630 /// a string. The first item is always a, possibly zero length, subslice of
1631 /// functional text. Line style comments contain their ending newlines.
1632 pub(crate) struct CommentCodeSlices<'a> {
1633     slice: &'a str,
1634     last_slice_kind: CodeCharKind,
1635     last_slice_end: usize,
1636 }
1637 
1638 impl<'a> CommentCodeSlices<'a> {
new(slice: &'a str) -> CommentCodeSlices<'a>1639     pub(crate) fn new(slice: &'a str) -> CommentCodeSlices<'a> {
1640         CommentCodeSlices {
1641             slice,
1642             last_slice_kind: CodeCharKind::Comment,
1643             last_slice_end: 0,
1644         }
1645     }
1646 }
1647 
1648 impl<'a> Iterator for CommentCodeSlices<'a> {
1649     type Item = (CodeCharKind, usize, &'a str);
1650 
next(&mut self) -> Option<Self::Item>1651     fn next(&mut self) -> Option<Self::Item> {
1652         if self.last_slice_end == self.slice.len() {
1653             return None;
1654         }
1655 
1656         let mut sub_slice_end = self.last_slice_end;
1657         let mut first_whitespace = None;
1658         let subslice = &self.slice[self.last_slice_end..];
1659         let mut iter = CharClasses::new(subslice.char_indices());
1660 
1661         for (kind, (i, c)) in &mut iter {
1662             let is_comment_connector = self.last_slice_kind == CodeCharKind::Normal
1663                 && &subslice[..2] == "//"
1664                 && [' ', '\t'].contains(&c);
1665 
1666             if is_comment_connector && first_whitespace.is_none() {
1667                 first_whitespace = Some(i);
1668             }
1669 
1670             if kind.to_codecharkind() == self.last_slice_kind && !is_comment_connector {
1671                 let last_index = match first_whitespace {
1672                     Some(j) => j,
1673                     None => i,
1674                 };
1675                 sub_slice_end = self.last_slice_end + last_index;
1676                 break;
1677             }
1678 
1679             if !is_comment_connector {
1680                 first_whitespace = None;
1681             }
1682         }
1683 
1684         if let (None, true) = (iter.next(), sub_slice_end == self.last_slice_end) {
1685             // This was the last subslice.
1686             sub_slice_end = match first_whitespace {
1687                 Some(i) => self.last_slice_end + i,
1688                 None => self.slice.len(),
1689             };
1690         }
1691 
1692         let kind = match self.last_slice_kind {
1693             CodeCharKind::Comment => CodeCharKind::Normal,
1694             CodeCharKind::Normal => CodeCharKind::Comment,
1695         };
1696         let res = (
1697             kind,
1698             self.last_slice_end,
1699             &self.slice[self.last_slice_end..sub_slice_end],
1700         );
1701         self.last_slice_end = sub_slice_end;
1702         self.last_slice_kind = kind;
1703 
1704         Some(res)
1705     }
1706 }
1707 
1708 /// Checks is `new` didn't miss any comment from `span`, if it removed any, return previous text
1709 /// (if it fits in the width/offset, else return `None`), else return `new`
recover_comment_removed( new: String, span: Span, context: &RewriteContext<'_>, ) -> Option<String>1710 pub(crate) fn recover_comment_removed(
1711     new: String,
1712     span: Span,
1713     context: &RewriteContext<'_>,
1714 ) -> Option<String> {
1715     let snippet = context.snippet(span);
1716     if snippet != new && changed_comment_content(snippet, &new) {
1717         // We missed some comments. Warn and keep the original text.
1718         if context.config.error_on_unformatted() {
1719             context.report.append(
1720                 context.parse_sess.span_to_filename(span),
1721                 vec![FormattingError::from_span(
1722                     span,
1723                     context.parse_sess,
1724                     ErrorKind::LostComment,
1725                 )],
1726             );
1727         }
1728         Some(snippet.to_owned())
1729     } else {
1730         Some(new)
1731     }
1732 }
1733 
filter_normal_code(code: &str) -> String1734 pub(crate) fn filter_normal_code(code: &str) -> String {
1735     let mut buffer = String::with_capacity(code.len());
1736     LineClasses::new(code).for_each(|(kind, line)| match kind {
1737         FullCodeCharKind::Normal
1738         | FullCodeCharKind::StartString
1739         | FullCodeCharKind::InString
1740         | FullCodeCharKind::EndString => {
1741             buffer.push_str(&line);
1742             buffer.push('\n');
1743         }
1744         _ => (),
1745     });
1746     if !code.ends_with('\n') && buffer.ends_with('\n') {
1747         buffer.pop();
1748     }
1749     buffer
1750 }
1751 
1752 /// Returns `true` if the two strings of code have the same payload of comments.
1753 /// The payload of comments is everything in the string except:
1754 /// - actual code (not comments),
1755 /// - comment start/end marks,
1756 /// - whitespace,
1757 /// - '*' at the beginning of lines in block comments.
changed_comment_content(orig: &str, new: &str) -> bool1758 fn changed_comment_content(orig: &str, new: &str) -> bool {
1759     // Cannot write this as a fn since we cannot return types containing closures.
1760     let code_comment_content = |code| {
1761         let slices = UngroupedCommentCodeSlices::new(code);
1762         slices
1763             .filter(|&(ref kind, _, _)| *kind == CodeCharKind::Comment)
1764             .flat_map(|(_, _, s)| CommentReducer::new(s))
1765     };
1766     let res = code_comment_content(orig).ne(code_comment_content(new));
1767     debug!(
1768         "comment::changed_comment_content: {}\norig: '{}'\nnew: '{}'\nraw_old: {}\nraw_new: {}",
1769         res,
1770         orig,
1771         new,
1772         code_comment_content(orig).collect::<String>(),
1773         code_comment_content(new).collect::<String>()
1774     );
1775     res
1776 }
1777 
1778 /// Iterator over the 'payload' characters of a comment.
1779 /// It skips whitespace, comment start/end marks, and '*' at the beginning of lines.
1780 /// The comment must be one comment, ie not more than one start mark (no multiple line comments,
1781 /// for example).
1782 struct CommentReducer<'a> {
1783     is_block: bool,
1784     at_start_line: bool,
1785     iter: std::str::Chars<'a>,
1786 }
1787 
1788 impl<'a> CommentReducer<'a> {
new(comment: &'a str) -> CommentReducer<'a>1789     fn new(comment: &'a str) -> CommentReducer<'a> {
1790         let is_block = comment.starts_with("/*");
1791         let comment = remove_comment_header(comment);
1792         CommentReducer {
1793             is_block,
1794             // There are no supplementary '*' on the first line.
1795             at_start_line: false,
1796             iter: comment.chars(),
1797         }
1798     }
1799 }
1800 
1801 impl<'a> Iterator for CommentReducer<'a> {
1802     type Item = char;
1803 
next(&mut self) -> Option<Self::Item>1804     fn next(&mut self) -> Option<Self::Item> {
1805         loop {
1806             let mut c = self.iter.next()?;
1807             if self.is_block && self.at_start_line {
1808                 while c.is_whitespace() {
1809                     c = self.iter.next()?;
1810                 }
1811                 // Ignore leading '*'.
1812                 if c == '*' {
1813                     c = self.iter.next()?;
1814                 }
1815             } else if c == '\n' {
1816                 self.at_start_line = true;
1817             }
1818             if !c.is_whitespace() {
1819                 return Some(c);
1820             }
1821         }
1822     }
1823 }
1824 
remove_comment_header(comment: &str) -> &str1825 fn remove_comment_header(comment: &str) -> &str {
1826     if comment.starts_with("///") || comment.starts_with("//!") {
1827         &comment[3..]
1828     } else if let Some(stripped) = comment.strip_prefix("//") {
1829         stripped
1830     } else if (comment.starts_with("/**") && !comment.starts_with("/**/"))
1831         || comment.starts_with("/*!")
1832     {
1833         &comment[3..comment.len() - 2]
1834     } else {
1835         assert!(
1836             comment.starts_with("/*"),
1837             "string '{}' is not a comment",
1838             comment
1839         );
1840         &comment[2..comment.len() - 2]
1841     }
1842 }
1843 
1844 #[cfg(test)]
1845 mod test {
1846     use super::*;
1847     use crate::shape::{Indent, Shape};
1848 
1849     #[test]
char_classes()1850     fn char_classes() {
1851         let mut iter = CharClasses::new("//\n\n".chars());
1852 
1853         assert_eq!((FullCodeCharKind::StartComment, '/'), iter.next().unwrap());
1854         assert_eq!((FullCodeCharKind::InComment, '/'), iter.next().unwrap());
1855         assert_eq!((FullCodeCharKind::EndComment, '\n'), iter.next().unwrap());
1856         assert_eq!((FullCodeCharKind::Normal, '\n'), iter.next().unwrap());
1857         assert_eq!(None, iter.next());
1858     }
1859 
1860     #[test]
comment_code_slices()1861     fn comment_code_slices() {
1862         let input = "code(); /* test */ 1 + 1";
1863         let mut iter = CommentCodeSlices::new(input);
1864 
1865         assert_eq!((CodeCharKind::Normal, 0, "code(); "), iter.next().unwrap());
1866         assert_eq!(
1867             (CodeCharKind::Comment, 8, "/* test */"),
1868             iter.next().unwrap()
1869         );
1870         assert_eq!((CodeCharKind::Normal, 18, " 1 + 1"), iter.next().unwrap());
1871         assert_eq!(None, iter.next());
1872     }
1873 
1874     #[test]
comment_code_slices_two()1875     fn comment_code_slices_two() {
1876         let input = "// comment\n    test();";
1877         let mut iter = CommentCodeSlices::new(input);
1878 
1879         assert_eq!((CodeCharKind::Normal, 0, ""), iter.next().unwrap());
1880         assert_eq!(
1881             (CodeCharKind::Comment, 0, "// comment\n"),
1882             iter.next().unwrap()
1883         );
1884         assert_eq!(
1885             (CodeCharKind::Normal, 11, "    test();"),
1886             iter.next().unwrap()
1887         );
1888         assert_eq!(None, iter.next());
1889     }
1890 
1891     #[test]
comment_code_slices_three()1892     fn comment_code_slices_three() {
1893         let input = "1 // comment\n    // comment2\n\n";
1894         let mut iter = CommentCodeSlices::new(input);
1895 
1896         assert_eq!((CodeCharKind::Normal, 0, "1 "), iter.next().unwrap());
1897         assert_eq!(
1898             (CodeCharKind::Comment, 2, "// comment\n    // comment2\n"),
1899             iter.next().unwrap()
1900         );
1901         assert_eq!((CodeCharKind::Normal, 29, "\n"), iter.next().unwrap());
1902         assert_eq!(None, iter.next());
1903     }
1904 
1905     #[test]
1906     #[rustfmt::skip]
format_doc_comments()1907     fn format_doc_comments() {
1908         let mut wrap_normalize_config: crate::config::Config = Default::default();
1909         wrap_normalize_config.set().wrap_comments(true);
1910         wrap_normalize_config.set().normalize_comments(true);
1911 
1912         let mut wrap_config: crate::config::Config = Default::default();
1913         wrap_config.set().wrap_comments(true);
1914 
1915         let comment = rewrite_comment(" //test",
1916                                       true,
1917                                       Shape::legacy(100, Indent::new(0, 100)),
1918                                       &wrap_normalize_config).unwrap();
1919         assert_eq!("/* test */", comment);
1920 
1921         let comment = rewrite_comment("// comment on a",
1922                                       false,
1923                                       Shape::legacy(10, Indent::empty()),
1924                                       &wrap_normalize_config).unwrap();
1925         assert_eq!("// comment\n// on a", comment);
1926 
1927         let comment = rewrite_comment("//  A multi line comment\n             // between args.",
1928                                       false,
1929                                       Shape::legacy(60, Indent::new(0, 12)),
1930                                       &wrap_normalize_config).unwrap();
1931         assert_eq!("//  A multi line comment\n            // between args.", comment);
1932 
1933         let input = "// comment";
1934         let expected =
1935             "/* comment */";
1936         let comment = rewrite_comment(input,
1937                                       true,
1938                                       Shape::legacy(9, Indent::new(0, 69)),
1939                                       &wrap_normalize_config).unwrap();
1940         assert_eq!(expected, comment);
1941 
1942         let comment = rewrite_comment("/*   trimmed    */",
1943                                       true,
1944                                       Shape::legacy(100, Indent::new(0, 100)),
1945                                       &wrap_normalize_config).unwrap();
1946         assert_eq!("/* trimmed */", comment);
1947 
1948         // Check that different comment style are properly recognised.
1949         let comment = rewrite_comment(r#"/// test1
1950                                          /// test2
1951                                          /*
1952                                           * test3
1953                                           */"#,
1954                                       false,
1955                                       Shape::legacy(100, Indent::new(0, 0)),
1956                                       &wrap_normalize_config).unwrap();
1957         assert_eq!("/// test1\n/// test2\n// test3", comment);
1958 
1959         // Check that the blank line marks the end of a commented paragraph.
1960         let comment = rewrite_comment(r#"// test1
1961 
1962                                          // test2"#,
1963                                       false,
1964                                       Shape::legacy(100, Indent::new(0, 0)),
1965                                       &wrap_normalize_config).unwrap();
1966         assert_eq!("// test1\n\n// test2", comment);
1967 
1968         // Check that the blank line marks the end of a custom-commented paragraph.
1969         let comment = rewrite_comment(r#"//@ test1
1970 
1971                                          //@ test2"#,
1972                                       false,
1973                                       Shape::legacy(100, Indent::new(0, 0)),
1974                                       &wrap_normalize_config).unwrap();
1975         assert_eq!("//@ test1\n\n//@ test2", comment);
1976 
1977         // Check that bare lines are just indented but otherwise left unchanged.
1978         let comment = rewrite_comment(r#"// test1
1979                                          /*
1980                                            a bare line!
1981 
1982                                                 another bare line!
1983                                           */"#,
1984                                       false,
1985                                       Shape::legacy(100, Indent::new(0, 0)),
1986                                       &wrap_config).unwrap();
1987         assert_eq!("// test1\n/*\n a bare line!\n\n      another bare line!\n*/", comment);
1988     }
1989 
1990     // This is probably intended to be a non-test fn, but it is not used.
1991     // We should keep this around unless it helps us test stuff to remove it.
uncommented(text: &str) -> String1992     fn uncommented(text: &str) -> String {
1993         CharClasses::new(text.chars())
1994             .filter_map(|(s, c)| match s {
1995                 FullCodeCharKind::Normal | FullCodeCharKind::InString => Some(c),
1996                 _ => None,
1997             })
1998             .collect()
1999     }
2000 
2001     #[test]
test_uncommented()2002     fn test_uncommented() {
2003         assert_eq!(&uncommented("abc/*...*/"), "abc");
2004         assert_eq!(
2005             &uncommented("// .... /* \n../* /* *** / */ */a/* // */c\n"),
2006             "..ac\n"
2007         );
2008         assert_eq!(&uncommented("abc \" /* */\" qsdf"), "abc \" /* */\" qsdf");
2009     }
2010 
2011     #[test]
test_contains_comment()2012     fn test_contains_comment() {
2013         assert_eq!(contains_comment("abc"), false);
2014         assert_eq!(contains_comment("abc // qsdf"), true);
2015         assert_eq!(contains_comment("abc /* kqsdf"), true);
2016         assert_eq!(contains_comment("abc \" /* */\" qsdf"), false);
2017     }
2018 
2019     #[test]
test_find_uncommented()2020     fn test_find_uncommented() {
2021         fn check(haystack: &str, needle: &str, expected: Option<usize>) {
2022             assert_eq!(expected, haystack.find_uncommented(needle));
2023         }
2024 
2025         check("/*/ */test", "test", Some(6));
2026         check("//test\ntest", "test", Some(7));
2027         check("/* comment only */", "whatever", None);
2028         check(
2029             "/* comment */ some text /* more commentary */ result",
2030             "result",
2031             Some(46),
2032         );
2033         check("sup // sup", "p", Some(2));
2034         check("sup", "x", None);
2035         check(r#"π? /**/ π is nice!"#, r#"π is nice"#, Some(9));
2036         check("/*sup yo? \n sup*/ sup", "p", Some(20));
2037         check("hel/*lohello*/lo", "hello", None);
2038         check("acb", "ab", None);
2039         check(",/*A*/ ", ",", Some(0));
2040         check("abc", "abc", Some(0));
2041         check("/* abc */", "abc", None);
2042         check("/**/abc/* */", "abc", Some(4));
2043         check("\"/* abc */\"", "abc", Some(4));
2044         check("\"/* abc", "abc", Some(4));
2045     }
2046 
2047     #[test]
test_filter_normal_code()2048     fn test_filter_normal_code() {
2049         let s = r#"
2050 fn main() {
2051     println!("hello, world");
2052 }
2053 "#;
2054         assert_eq!(s, filter_normal_code(s));
2055         let s_with_comment = r#"
2056 fn main() {
2057     // hello, world
2058     println!("hello, world");
2059 }
2060 "#;
2061         assert_eq!(s, filter_normal_code(s_with_comment));
2062     }
2063 
2064     #[test]
test_itemized_block_first_line_handling()2065     fn test_itemized_block_first_line_handling() {
2066         fn run_test(
2067             test_input: &str,
2068             expected_line: &str,
2069             expected_indent: usize,
2070             expected_opener: &str,
2071             expected_line_start: &str,
2072         ) {
2073             let block = ItemizedBlock::new(test_input).unwrap();
2074             assert_eq!(1, block.lines.len(), "test_input: {:?}", test_input);
2075             assert_eq!(
2076                 expected_line, &block.lines[0],
2077                 "test_input: {:?}",
2078                 test_input
2079             );
2080             assert_eq!(
2081                 expected_indent, block.indent,
2082                 "test_input: {:?}",
2083                 test_input
2084             );
2085             assert_eq!(
2086                 expected_opener, &block.opener,
2087                 "test_input: {:?}",
2088                 test_input
2089             );
2090             assert_eq!(
2091                 expected_line_start, &block.line_start,
2092                 "test_input: {:?}",
2093                 test_input
2094             );
2095         }
2096 
2097         run_test("- foo", "foo", 2, "- ", "  ");
2098         run_test("* foo", "foo", 2, "* ", "  ");
2099         run_test("> foo", "foo", 2, "> ", "> ");
2100 
2101         run_test("1. foo", "foo", 3, "1. ", "   ");
2102         run_test("12. foo", "foo", 4, "12. ", "    ");
2103         run_test("1) foo", "foo", 3, "1) ", "   ");
2104         run_test("12) foo", "foo", 4, "12) ", "    ");
2105 
2106         run_test("    - foo", "foo", 6, "    - ", "      ");
2107 
2108         // https://spec.commonmark.org/0.30 says: "A start number may begin with 0s":
2109         run_test("0. foo", "foo", 3, "0. ", "   ");
2110         run_test("01. foo", "foo", 4, "01. ", "    ");
2111     }
2112 
2113     #[test]
test_itemized_block_nonobvious_markers_are_rejected()2114     fn test_itemized_block_nonobvious_markers_are_rejected() {
2115         let test_inputs = vec![
2116             // Non-numeric item markers (e.g. `a.` or `iv.`) are not allowed by
2117             // https://spec.commonmark.org/0.30/#ordered-list-marker. We also note that allowing
2118             // them would risk misidentifying regular words as item markers. See also the
2119             // discussion in https://talk.commonmark.org/t/blank-lines-before-lists-revisited/1990
2120             "word.  rest of the paragraph.",
2121             "a.  maybe this is a list item?  maybe not?",
2122             "iv.  maybe this is a list item?  maybe not?",
2123             // Numbers with 3 or more digits are not recognized as item markers, to avoid
2124             // formatting the following example as a list:
2125             //
2126             // ```
2127             // The Captain died in
2128             // 1868. He was buried in...
2129             // ```
2130             "123.  only 2-digit numbers are recognized as item markers.",
2131             // Parens:
2132             "123)  giving some coverage to parens as well.",
2133             "a)  giving some coverage to parens as well.",
2134             // https://spec.commonmark.org/0.30 says that "at least one space or tab is needed
2135             // between the list marker and any following content":
2136             "1.Not a list item.",
2137             "1.2.3. Not a list item.",
2138             "1)Not a list item.",
2139             "-Not a list item.",
2140             "+Not a list item.",
2141             "+1 not a list item.",
2142             // https://spec.commonmark.org/0.30 says: "A start number may not be negative":
2143             "-1. Not a list item.",
2144             "-1 Not a list item.",
2145         ];
2146         for line in test_inputs.iter() {
2147             let maybe_block = ItemizedBlock::new(line);
2148             assert!(
2149                 maybe_block.is_none(),
2150                 "The following line shouldn't be classified as a list item: {}",
2151                 line
2152             );
2153         }
2154     }
2155 }
2156