1 // Formatting and tools for comments.
2
3 use std::{self, borrow::Cow, iter};
4
5 use itertools::{multipeek, MultiPeek};
6 use lazy_static::lazy_static;
7 use regex::Regex;
8 use rustc_span::Span;
9
10 use crate::config::Config;
11 use crate::rewrite::RewriteContext;
12 use crate::shape::{Indent, Shape};
13 use crate::string::{rewrite_string, StringFormat};
14 use crate::utils::{
15 count_newlines, first_line_width, last_line_width, trim_left_preserve_layout,
16 trimmed_last_line_width, unicode_str_width,
17 };
18 use crate::{ErrorKind, FormattingError};
19
20 lazy_static! {
21 /// A regex matching reference doc links.
22 ///
23 /// ```markdown
24 /// /// An [example].
25 /// ///
26 /// /// [example]: this::is::a::link
27 /// ```
28 static ref REFERENCE_LINK_URL: Regex = Regex::new(r"^\[.+\]\s?:").unwrap();
29 }
30
is_custom_comment(comment: &str) -> bool31 fn is_custom_comment(comment: &str) -> bool {
32 if !comment.starts_with("//") {
33 false
34 } else if let Some(c) = comment.chars().nth(2) {
35 !c.is_alphanumeric() && !c.is_whitespace()
36 } else {
37 false
38 }
39 }
40
41 #[derive(Copy, Clone, PartialEq, Eq)]
42 pub(crate) enum CommentStyle<'a> {
43 DoubleSlash,
44 TripleSlash,
45 Doc,
46 SingleBullet,
47 DoubleBullet,
48 Exclamation,
49 Custom(&'a str),
50 }
51
custom_opener(s: &str) -> &str52 fn custom_opener(s: &str) -> &str {
53 s.lines().next().map_or("", |first_line| {
54 first_line
55 .find(' ')
56 .map_or(first_line, |space_index| &first_line[0..=space_index])
57 })
58 }
59
60 impl<'a> CommentStyle<'a> {
61 /// Returns `true` if the commenting style covers a line only.
is_line_comment(&self) -> bool62 pub(crate) fn is_line_comment(&self) -> bool {
63 match *self {
64 CommentStyle::DoubleSlash
65 | CommentStyle::TripleSlash
66 | CommentStyle::Doc
67 | CommentStyle::Custom(_) => true,
68 _ => false,
69 }
70 }
71
72 /// Returns `true` if the commenting style can span over multiple lines.
is_block_comment(&self) -> bool73 pub(crate) fn is_block_comment(&self) -> bool {
74 match *self {
75 CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation => {
76 true
77 }
78 _ => false,
79 }
80 }
81
82 /// Returns `true` if the commenting style is for documentation.
is_doc_comment(&self) -> bool83 pub(crate) fn is_doc_comment(&self) -> bool {
84 matches!(*self, CommentStyle::TripleSlash | CommentStyle::Doc)
85 }
86
opener(&self) -> &'a str87 pub(crate) fn opener(&self) -> &'a str {
88 match *self {
89 CommentStyle::DoubleSlash => "// ",
90 CommentStyle::TripleSlash => "/// ",
91 CommentStyle::Doc => "//! ",
92 CommentStyle::SingleBullet => "/* ",
93 CommentStyle::DoubleBullet => "/** ",
94 CommentStyle::Exclamation => "/*! ",
95 CommentStyle::Custom(opener) => opener,
96 }
97 }
98
closer(&self) -> &'a str99 pub(crate) fn closer(&self) -> &'a str {
100 match *self {
101 CommentStyle::DoubleSlash
102 | CommentStyle::TripleSlash
103 | CommentStyle::Custom(..)
104 | CommentStyle::Doc => "",
105 CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation => {
106 " */"
107 }
108 }
109 }
110
line_start(&self) -> &'a str111 pub(crate) fn line_start(&self) -> &'a str {
112 match *self {
113 CommentStyle::DoubleSlash => "// ",
114 CommentStyle::TripleSlash => "/// ",
115 CommentStyle::Doc => "//! ",
116 CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation => {
117 " * "
118 }
119 CommentStyle::Custom(opener) => opener,
120 }
121 }
122
to_str_tuplet(&self) -> (&'a str, &'a str, &'a str)123 pub(crate) fn to_str_tuplet(&self) -> (&'a str, &'a str, &'a str) {
124 (self.opener(), self.closer(), self.line_start())
125 }
126 }
127
comment_style(orig: &str, normalize_comments: bool) -> CommentStyle<'_>128 pub(crate) fn comment_style(orig: &str, normalize_comments: bool) -> CommentStyle<'_> {
129 if !normalize_comments {
130 if orig.starts_with("/**") && !orig.starts_with("/**/") {
131 CommentStyle::DoubleBullet
132 } else if orig.starts_with("/*!") {
133 CommentStyle::Exclamation
134 } else if orig.starts_with("/*") {
135 CommentStyle::SingleBullet
136 } else if orig.starts_with("///") && orig.chars().nth(3).map_or(true, |c| c != '/') {
137 CommentStyle::TripleSlash
138 } else if orig.starts_with("//!") {
139 CommentStyle::Doc
140 } else if is_custom_comment(orig) {
141 CommentStyle::Custom(custom_opener(orig))
142 } else {
143 CommentStyle::DoubleSlash
144 }
145 } else if (orig.starts_with("///") && orig.chars().nth(3).map_or(true, |c| c != '/'))
146 || (orig.starts_with("/**") && !orig.starts_with("/**/"))
147 {
148 CommentStyle::TripleSlash
149 } else if orig.starts_with("//!") || orig.starts_with("/*!") {
150 CommentStyle::Doc
151 } else if is_custom_comment(orig) {
152 CommentStyle::Custom(custom_opener(orig))
153 } else {
154 CommentStyle::DoubleSlash
155 }
156 }
157
158 /// Returns true if the last line of the passed string finishes with a block-comment.
is_last_comment_block(s: &str) -> bool159 pub(crate) fn is_last_comment_block(s: &str) -> bool {
160 s.trim_end().ends_with("*/")
161 }
162
163 /// Combine `prev_str` and `next_str` into a single `String`. `span` may contain
164 /// comments between two strings. If there are such comments, then that will be
165 /// recovered. If `allow_extend` is true and there is no comment between the two
166 /// strings, then they will be put on a single line as long as doing so does not
167 /// exceed max width.
combine_strs_with_missing_comments( context: &RewriteContext<'_>, prev_str: &str, next_str: &str, span: Span, shape: Shape, allow_extend: bool, ) -> Option<String>168 pub(crate) fn combine_strs_with_missing_comments(
169 context: &RewriteContext<'_>,
170 prev_str: &str,
171 next_str: &str,
172 span: Span,
173 shape: Shape,
174 allow_extend: bool,
175 ) -> Option<String> {
176 trace!(
177 "combine_strs_with_missing_comments `{}` `{}` {:?} {:?}",
178 prev_str,
179 next_str,
180 span,
181 shape
182 );
183
184 let mut result =
185 String::with_capacity(prev_str.len() + next_str.len() + shape.indent.width() + 128);
186 result.push_str(prev_str);
187 let mut allow_one_line = !prev_str.contains('\n') && !next_str.contains('\n');
188 let first_sep =
189 if prev_str.is_empty() || next_str.is_empty() || trimmed_last_line_width(prev_str) == 0 {
190 ""
191 } else {
192 " "
193 };
194 let mut one_line_width =
195 last_line_width(prev_str) + first_line_width(next_str) + first_sep.len();
196
197 let config = context.config;
198 let indent = shape.indent;
199 let missing_comment = rewrite_missing_comment(span, shape, context)?;
200
201 if missing_comment.is_empty() {
202 if allow_extend && one_line_width <= shape.width {
203 result.push_str(first_sep);
204 } else if !prev_str.is_empty() {
205 result.push_str(&indent.to_string_with_newline(config))
206 }
207 result.push_str(next_str);
208 return Some(result);
209 }
210
211 // We have a missing comment between the first expression and the second expression.
212
213 // Peek the the original source code and find out whether there is a newline between the first
214 // expression and the second expression or the missing comment. We will preserve the original
215 // layout whenever possible.
216 let original_snippet = context.snippet(span);
217 let prefer_same_line = if let Some(pos) = original_snippet.find('/') {
218 !original_snippet[..pos].contains('\n')
219 } else {
220 !original_snippet.contains('\n')
221 };
222
223 one_line_width -= first_sep.len();
224 let first_sep = if prev_str.is_empty() || missing_comment.is_empty() {
225 Cow::from("")
226 } else {
227 let one_line_width = last_line_width(prev_str) + first_line_width(&missing_comment) + 1;
228 if prefer_same_line && one_line_width <= shape.width {
229 Cow::from(" ")
230 } else {
231 indent.to_string_with_newline(config)
232 }
233 };
234 result.push_str(&first_sep);
235 result.push_str(&missing_comment);
236
237 let second_sep = if missing_comment.is_empty() || next_str.is_empty() {
238 Cow::from("")
239 } else if missing_comment.starts_with("//") {
240 indent.to_string_with_newline(config)
241 } else {
242 one_line_width += missing_comment.len() + first_sep.len() + 1;
243 allow_one_line &= !missing_comment.starts_with("//") && !missing_comment.contains('\n');
244 if prefer_same_line && allow_one_line && one_line_width <= shape.width {
245 Cow::from(" ")
246 } else {
247 indent.to_string_with_newline(config)
248 }
249 };
250 result.push_str(&second_sep);
251 result.push_str(next_str);
252
253 Some(result)
254 }
255
rewrite_doc_comment(orig: &str, shape: Shape, config: &Config) -> Option<String>256 pub(crate) fn rewrite_doc_comment(orig: &str, shape: Shape, config: &Config) -> Option<String> {
257 identify_comment(orig, false, shape, config, true)
258 }
259
rewrite_comment( orig: &str, block_style: bool, shape: Shape, config: &Config, ) -> Option<String>260 pub(crate) fn rewrite_comment(
261 orig: &str,
262 block_style: bool,
263 shape: Shape,
264 config: &Config,
265 ) -> Option<String> {
266 identify_comment(orig, block_style, shape, config, false)
267 }
268
identify_comment( orig: &str, block_style: bool, shape: Shape, config: &Config, is_doc_comment: bool, ) -> Option<String>269 fn identify_comment(
270 orig: &str,
271 block_style: bool,
272 shape: Shape,
273 config: &Config,
274 is_doc_comment: bool,
275 ) -> Option<String> {
276 let style = comment_style(orig, false);
277
278 // Computes the byte length of line taking into account a newline if the line is part of a
279 // paragraph.
280 fn compute_len(orig: &str, line: &str) -> usize {
281 if orig.len() > line.len() {
282 if orig.as_bytes()[line.len()] == b'\r' {
283 line.len() + 2
284 } else {
285 line.len() + 1
286 }
287 } else {
288 line.len()
289 }
290 }
291
292 // Get the first group of line comments having the same commenting style.
293 //
294 // Returns a tuple with:
295 // - a boolean indicating if there is a blank line
296 // - a number indicating the size of the first group of comments
297 fn consume_same_line_comments(
298 style: CommentStyle<'_>,
299 orig: &str,
300 line_start: &str,
301 ) -> (bool, usize) {
302 let mut first_group_ending = 0;
303 let mut hbl = false;
304
305 for line in orig.lines() {
306 let trimmed_line = line.trim_start();
307 if trimmed_line.is_empty() {
308 hbl = true;
309 break;
310 } else if trimmed_line.starts_with(line_start)
311 || comment_style(trimmed_line, false) == style
312 {
313 first_group_ending += compute_len(&orig[first_group_ending..], line);
314 } else {
315 break;
316 }
317 }
318 (hbl, first_group_ending)
319 }
320
321 let (has_bare_lines, first_group_ending) = match style {
322 CommentStyle::DoubleSlash | CommentStyle::TripleSlash | CommentStyle::Doc => {
323 let line_start = style.line_start().trim_start();
324 consume_same_line_comments(style, orig, line_start)
325 }
326 CommentStyle::Custom(opener) => {
327 let trimmed_opener = opener.trim_end();
328 consume_same_line_comments(style, orig, trimmed_opener)
329 }
330 // for a block comment, search for the closing symbol
331 CommentStyle::DoubleBullet | CommentStyle::SingleBullet | CommentStyle::Exclamation => {
332 let closer = style.closer().trim_start();
333 let mut count = orig.matches(closer).count();
334 let mut closing_symbol_offset = 0;
335 let mut hbl = false;
336 let mut first = true;
337 for line in orig.lines() {
338 closing_symbol_offset += compute_len(&orig[closing_symbol_offset..], line);
339 let mut trimmed_line = line.trim_start();
340 if !trimmed_line.starts_with('*')
341 && !trimmed_line.starts_with("//")
342 && !trimmed_line.starts_with("/*")
343 {
344 hbl = true;
345 }
346
347 // Remove opener from consideration when searching for closer
348 if first {
349 let opener = style.opener().trim_end();
350 trimmed_line = &trimmed_line[opener.len()..];
351 first = false;
352 }
353 if trimmed_line.ends_with(closer) {
354 count -= 1;
355 if count == 0 {
356 break;
357 }
358 }
359 }
360 (hbl, closing_symbol_offset)
361 }
362 };
363
364 let (first_group, rest) = orig.split_at(first_group_ending);
365 let rewritten_first_group =
366 if !config.normalize_comments() && has_bare_lines && style.is_block_comment() {
367 trim_left_preserve_layout(first_group, shape.indent, config)?
368 } else if !config.normalize_comments()
369 && !config.wrap_comments()
370 && !config.format_code_in_doc_comments()
371 {
372 light_rewrite_comment(first_group, shape.indent, config, is_doc_comment)
373 } else {
374 rewrite_comment_inner(
375 first_group,
376 block_style,
377 style,
378 shape,
379 config,
380 is_doc_comment || style.is_doc_comment(),
381 )?
382 };
383 if rest.is_empty() {
384 Some(rewritten_first_group)
385 } else {
386 identify_comment(
387 rest.trim_start(),
388 block_style,
389 shape,
390 config,
391 is_doc_comment,
392 )
393 .map(|rest_str| {
394 format!(
395 "{}\n{}{}{}",
396 rewritten_first_group,
397 // insert back the blank line
398 if has_bare_lines && style.is_line_comment() {
399 "\n"
400 } else {
401 ""
402 },
403 shape.indent.to_string(config),
404 rest_str
405 )
406 })
407 }
408 }
409
410 /// Enum indicating if the code block contains rust based on attributes
411 enum CodeBlockAttribute {
412 Rust,
413 NotRust,
414 }
415
416 impl CodeBlockAttribute {
417 /// Parse comma separated attributes list. Return rust only if all
418 /// attributes are valid rust attributes
419 /// See <https://doc.rust-lang.org/rustdoc/print.html#attributes>
new(attributes: &str) -> CodeBlockAttribute420 fn new(attributes: &str) -> CodeBlockAttribute {
421 for attribute in attributes.split(',') {
422 match attribute.trim() {
423 "" | "rust" | "should_panic" | "no_run" | "edition2015" | "edition2018"
424 | "edition2021" => (),
425 "ignore" | "compile_fail" | "text" => return CodeBlockAttribute::NotRust,
426 _ => return CodeBlockAttribute::NotRust,
427 }
428 }
429 CodeBlockAttribute::Rust
430 }
431 }
432
433 /// Block that is formatted as an item.
434 ///
435 /// An item starts with either a star `*`, a dash `-`, a greater-than `>`, a plus '+', or a number
436 /// `12.` or `34)` (with at most 2 digits). An item represents CommonMark's ["list
437 /// items"](https://spec.commonmark.org/0.30/#list-items) and/or ["block
438 /// quotes"](https://spec.commonmark.org/0.30/#block-quotes), but note that only a subset of
439 /// CommonMark is recognized - see the doc comment of [`ItemizedBlock::get_marker_length`] for more
440 /// details.
441 ///
442 /// Different level of indentation are handled by shrinking the shape accordingly.
443 struct ItemizedBlock {
444 /// the lines that are identified as part of an itemized block
445 lines: Vec<String>,
446 /// the number of characters (typically whitespaces) up to the item marker
447 indent: usize,
448 /// the string that marks the start of an item
449 opener: String,
450 /// sequence of characters (typically whitespaces) to prefix new lines that are part of the item
451 line_start: String,
452 }
453
454 impl ItemizedBlock {
455 /// Checks whether the `trimmed` line includes an item marker. Returns `None` if there is no
456 /// marker. Returns the length of the marker (in bytes) if one is present. Note that the length
457 /// includes the whitespace that follows the marker, for example the marker in `"* list item"`
458 /// has the length of 2.
459 ///
460 /// This function recognizes item markers that correspond to CommonMark's
461 /// ["bullet list marker"](https://spec.commonmark.org/0.30/#bullet-list-marker),
462 /// ["block quote marker"](https://spec.commonmark.org/0.30/#block-quote-marker), and/or
463 /// ["ordered list marker"](https://spec.commonmark.org/0.30/#ordered-list-marker).
464 ///
465 /// Compared to CommonMark specification, the number of digits that are allowed in an ["ordered
466 /// list marker"](https://spec.commonmark.org/0.30/#ordered-list-marker) is more limited (to at
467 /// most 2 digits). Limiting the length of the marker helps reduce the risk of recognizing
468 /// arbitrary numbers as markers. See also
469 /// <https://talk.commonmark.org/t/blank-lines-before-lists-revisited/1990> which gives the
470 /// following example where a number (i.e. "1868") doesn't signify an ordered list:
471 /// ```md
472 /// The Captain died in
473 /// 1868. He wes buried in...
474 /// ```
get_marker_length(trimmed: &str) -> Option<usize>475 fn get_marker_length(trimmed: &str) -> Option<usize> {
476 // https://spec.commonmark.org/0.30/#bullet-list-marker or
477 // https://spec.commonmark.org/0.30/#block-quote-marker
478 let itemized_start = ["* ", "- ", "> ", "+ "];
479 if itemized_start.iter().any(|s| trimmed.starts_with(s)) {
480 return Some(2); // All items in `itemized_start` have length 2.
481 }
482
483 // https://spec.commonmark.org/0.30/#ordered-list-marker, where at most 2 digits are
484 // allowed.
485 for suffix in [". ", ") "] {
486 if let Some((prefix, _)) = trimmed.split_once(suffix) {
487 if prefix.len() <= 2 && prefix.chars().all(|c| char::is_ascii_digit(&c)) {
488 return Some(prefix.len() + suffix.len());
489 }
490 }
491 }
492
493 None // No markers found.
494 }
495
496 /// Creates a new `ItemizedBlock` described with the given `line`.
497 /// Returns `None` if `line` doesn't start an item.
new(line: &str) -> Option<ItemizedBlock>498 fn new(line: &str) -> Option<ItemizedBlock> {
499 let marker_length = ItemizedBlock::get_marker_length(line.trim_start())?;
500 let space_to_marker = line.chars().take_while(|c| c.is_whitespace()).count();
501 let mut indent = space_to_marker + marker_length;
502 let mut line_start = " ".repeat(indent);
503
504 // Markdown blockquote start with a "> "
505 if line.trim_start().starts_with(">") {
506 // remove the original +2 indent because there might be multiple nested block quotes
507 // and it's easier to reason about the final indent by just taking the length
508 // of the new line_start. We update the indent because it effects the max width
509 // of each formatted line.
510 line_start = itemized_block_quote_start(line, line_start, 2);
511 indent = line_start.len();
512 }
513 Some(ItemizedBlock {
514 lines: vec![line[indent..].to_string()],
515 indent,
516 opener: line[..indent].to_string(),
517 line_start,
518 })
519 }
520
521 /// Returns a `StringFormat` used for formatting the content of an item.
create_string_format<'a>(&'a self, fmt: &'a StringFormat<'_>) -> StringFormat<'a>522 fn create_string_format<'a>(&'a self, fmt: &'a StringFormat<'_>) -> StringFormat<'a> {
523 StringFormat {
524 opener: "",
525 closer: "",
526 line_start: "",
527 line_end: "",
528 shape: Shape::legacy(fmt.shape.width.saturating_sub(self.indent), Indent::empty()),
529 trim_end: true,
530 config: fmt.config,
531 }
532 }
533
534 /// Returns `true` if the line is part of the current itemized block.
535 /// If it is, then it is added to the internal lines list.
add_line(&mut self, line: &str) -> bool536 fn add_line(&mut self, line: &str) -> bool {
537 if ItemizedBlock::get_marker_length(line.trim_start()).is_none()
538 && self.indent <= line.chars().take_while(|c| c.is_whitespace()).count()
539 {
540 self.lines.push(line.to_string());
541 return true;
542 }
543 false
544 }
545
546 /// Returns the block as a string, with each line trimmed at the start.
trimmed_block_as_string(&self) -> String547 fn trimmed_block_as_string(&self) -> String {
548 self.lines
549 .iter()
550 .map(|line| format!("{} ", line.trim_start()))
551 .collect::<String>()
552 }
553
554 /// Returns the block as a string under its original form.
original_block_as_string(&self) -> String555 fn original_block_as_string(&self) -> String {
556 self.lines.join("\n")
557 }
558 }
559
560 /// Determine the line_start when formatting markdown block quotes.
561 /// The original line_start likely contains indentation (whitespaces), which we'd like to
562 /// replace with '> ' characters.
itemized_block_quote_start(line: &str, mut line_start: String, remove_indent: usize) -> String563 fn itemized_block_quote_start(line: &str, mut line_start: String, remove_indent: usize) -> String {
564 let quote_level = line
565 .chars()
566 .take_while(|c| !c.is_alphanumeric())
567 .fold(0, |acc, c| if c == '>' { acc + 1 } else { acc });
568
569 for _ in 0..remove_indent {
570 line_start.pop();
571 }
572
573 for _ in 0..quote_level {
574 line_start.push_str("> ")
575 }
576 line_start
577 }
578
579 struct CommentRewrite<'a> {
580 result: String,
581 code_block_buffer: String,
582 is_prev_line_multi_line: bool,
583 code_block_attr: Option<CodeBlockAttribute>,
584 item_block: Option<ItemizedBlock>,
585 comment_line_separator: String,
586 indent_str: String,
587 max_width: usize,
588 fmt_indent: Indent,
589 fmt: StringFormat<'a>,
590
591 opener: String,
592 closer: String,
593 line_start: String,
594 style: CommentStyle<'a>,
595 }
596
597 impl<'a> CommentRewrite<'a> {
new( orig: &'a str, block_style: bool, shape: Shape, config: &'a Config, ) -> CommentRewrite<'a>598 fn new(
599 orig: &'a str,
600 block_style: bool,
601 shape: Shape,
602 config: &'a Config,
603 ) -> CommentRewrite<'a> {
604 let ((opener, closer, line_start), style) = if block_style {
605 (
606 CommentStyle::SingleBullet.to_str_tuplet(),
607 CommentStyle::SingleBullet,
608 )
609 } else {
610 let style = comment_style(orig, config.normalize_comments());
611 (style.to_str_tuplet(), style)
612 };
613
614 let max_width = shape
615 .width
616 .checked_sub(closer.len() + opener.len())
617 .unwrap_or(1);
618 let indent_str = shape.indent.to_string_with_newline(config).to_string();
619
620 let mut cr = CommentRewrite {
621 result: String::with_capacity(orig.len() * 2),
622 code_block_buffer: String::with_capacity(128),
623 is_prev_line_multi_line: false,
624 code_block_attr: None,
625 item_block: None,
626 comment_line_separator: format!("{}{}", indent_str, line_start),
627 max_width,
628 indent_str,
629 fmt_indent: shape.indent,
630
631 fmt: StringFormat {
632 opener: "",
633 closer: "",
634 line_start,
635 line_end: "",
636 shape: Shape::legacy(max_width, shape.indent),
637 trim_end: true,
638 config,
639 },
640
641 opener: opener.to_owned(),
642 closer: closer.to_owned(),
643 line_start: line_start.to_owned(),
644 style,
645 };
646 cr.result.push_str(opener);
647 cr
648 }
649
join_block(s: &str, sep: &str) -> String650 fn join_block(s: &str, sep: &str) -> String {
651 let mut result = String::with_capacity(s.len() + 128);
652 let mut iter = s.lines().peekable();
653 while let Some(line) = iter.next() {
654 result.push_str(line);
655 result.push_str(match iter.peek() {
656 Some(next_line) if next_line.is_empty() => sep.trim_end(),
657 Some(..) => sep,
658 None => "",
659 });
660 }
661 result
662 }
663
664 /// Check if any characters were written to the result buffer after the start of the comment.
665 /// when calling [`CommentRewrite::new()`] the result buffer is initiazlied with the opening
666 /// characters for the comment.
buffer_contains_comment(&self) -> bool667 fn buffer_contains_comment(&self) -> bool {
668 // if self.result.len() < self.opener.len() then an empty comment is in the buffer
669 // if self.result.len() > self.opener.len() then a non empty comment is in the buffer
670 self.result.len() != self.opener.len()
671 }
672
finish(mut self) -> String673 fn finish(mut self) -> String {
674 if !self.code_block_buffer.is_empty() {
675 // There is a code block that is not properly enclosed by backticks.
676 // We will leave them untouched.
677 self.result.push_str(&self.comment_line_separator);
678 self.result.push_str(&Self::join_block(
679 &trim_custom_comment_prefix(&self.code_block_buffer),
680 &self.comment_line_separator,
681 ));
682 }
683
684 if let Some(ref ib) = self.item_block {
685 // the last few lines are part of an itemized block
686 self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
687 let item_fmt = ib.create_string_format(&self.fmt);
688
689 // only push a comment_line_separator for ItemizedBlocks if the comment is not empty
690 if self.buffer_contains_comment() {
691 self.result.push_str(&self.comment_line_separator);
692 }
693
694 self.result.push_str(&ib.opener);
695 match rewrite_string(
696 &ib.trimmed_block_as_string(),
697 &item_fmt,
698 self.max_width.saturating_sub(ib.indent),
699 ) {
700 Some(s) => self.result.push_str(&Self::join_block(
701 &s,
702 &format!("{}{}", self.comment_line_separator, ib.line_start),
703 )),
704 None => self.result.push_str(&Self::join_block(
705 &ib.original_block_as_string(),
706 &self.comment_line_separator,
707 )),
708 };
709 }
710
711 self.result.push_str(&self.closer);
712 if self.result.ends_with(&self.opener) && self.opener.ends_with(' ') {
713 // Trailing space.
714 self.result.pop();
715 }
716
717 self.result
718 }
719
handle_line( &mut self, orig: &'a str, i: usize, line: &'a str, has_leading_whitespace: bool, is_doc_comment: bool, ) -> bool720 fn handle_line(
721 &mut self,
722 orig: &'a str,
723 i: usize,
724 line: &'a str,
725 has_leading_whitespace: bool,
726 is_doc_comment: bool,
727 ) -> bool {
728 let num_newlines = count_newlines(orig);
729 let is_last = i == num_newlines;
730 let needs_new_comment_line = if self.style.is_block_comment() {
731 num_newlines > 0 || self.buffer_contains_comment()
732 } else {
733 self.buffer_contains_comment()
734 };
735
736 if let Some(ref mut ib) = self.item_block {
737 if ib.add_line(line) {
738 return false;
739 }
740 self.is_prev_line_multi_line = false;
741 self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
742 let item_fmt = ib.create_string_format(&self.fmt);
743
744 // only push a comment_line_separator if we need to start a new comment line
745 if needs_new_comment_line {
746 self.result.push_str(&self.comment_line_separator);
747 }
748
749 self.result.push_str(&ib.opener);
750 match rewrite_string(
751 &ib.trimmed_block_as_string(),
752 &item_fmt,
753 self.max_width.saturating_sub(ib.indent),
754 ) {
755 Some(s) => self.result.push_str(&Self::join_block(
756 &s,
757 &format!("{}{}", self.comment_line_separator, ib.line_start),
758 )),
759 None => self.result.push_str(&Self::join_block(
760 &ib.original_block_as_string(),
761 &self.comment_line_separator,
762 )),
763 };
764 } else if self.code_block_attr.is_some() {
765 if line.starts_with("```") {
766 let code_block = match self.code_block_attr.as_ref().unwrap() {
767 CodeBlockAttribute::Rust
768 if self.fmt.config.format_code_in_doc_comments()
769 && !self.code_block_buffer.trim().is_empty() =>
770 {
771 let mut config = self.fmt.config.clone();
772 config.set().wrap_comments(false);
773 let comment_max_width = config
774 .doc_comment_code_block_width()
775 .min(config.max_width());
776 config.set().max_width(comment_max_width);
777 if let Some(s) =
778 crate::format_code_block(&self.code_block_buffer, &config, false)
779 {
780 trim_custom_comment_prefix(&s.snippet)
781 } else {
782 trim_custom_comment_prefix(&self.code_block_buffer)
783 }
784 }
785 _ => trim_custom_comment_prefix(&self.code_block_buffer),
786 };
787 if !code_block.is_empty() {
788 self.result.push_str(&self.comment_line_separator);
789 self.result
790 .push_str(&Self::join_block(&code_block, &self.comment_line_separator));
791 }
792 self.code_block_buffer.clear();
793 self.result.push_str(&self.comment_line_separator);
794 self.result.push_str(line);
795 self.code_block_attr = None;
796 } else {
797 self.code_block_buffer
798 .push_str(&hide_sharp_behind_comment(line));
799 self.code_block_buffer.push('\n');
800 }
801 return false;
802 }
803
804 self.code_block_attr = None;
805 self.item_block = None;
806 if let Some(stripped) = line.strip_prefix("```") {
807 self.code_block_attr = Some(CodeBlockAttribute::new(stripped))
808 } else if self.fmt.config.wrap_comments() {
809 if let Some(ib) = ItemizedBlock::new(line) {
810 self.item_block = Some(ib);
811 return false;
812 }
813 }
814
815 if self.result == self.opener {
816 let force_leading_whitespace = &self.opener == "/* " && count_newlines(orig) == 0;
817 if !has_leading_whitespace && !force_leading_whitespace && self.result.ends_with(' ') {
818 self.result.pop();
819 }
820 if line.is_empty() {
821 return false;
822 }
823 } else if self.is_prev_line_multi_line && !line.is_empty() {
824 self.result.push(' ')
825 } else if is_last && line.is_empty() {
826 // trailing blank lines are unwanted
827 if !self.closer.is_empty() {
828 self.result.push_str(&self.indent_str);
829 }
830 return true;
831 } else {
832 self.result.push_str(&self.comment_line_separator);
833 if !has_leading_whitespace && self.result.ends_with(' ') {
834 self.result.pop();
835 }
836 }
837
838 let is_markdown_header_doc_comment = is_doc_comment && line.starts_with("#");
839
840 // We only want to wrap the comment if:
841 // 1) wrap_comments = true is configured
842 // 2) The comment is not the start of a markdown header doc comment
843 // 3) The comment width exceeds the shape's width
844 // 4) No URLS were found in the comment
845 // If this changes, the documentation in ../Configurations.md#wrap_comments
846 // should be changed accordingly.
847 let should_wrap_comment = self.fmt.config.wrap_comments()
848 && !is_markdown_header_doc_comment
849 && unicode_str_width(line) > self.fmt.shape.width
850 && !has_url(line)
851 && !is_table_item(line);
852
853 if should_wrap_comment {
854 match rewrite_string(line, &self.fmt, self.max_width) {
855 Some(ref s) => {
856 self.is_prev_line_multi_line = s.contains('\n');
857 self.result.push_str(s);
858 }
859 None if self.is_prev_line_multi_line => {
860 // We failed to put the current `line` next to the previous `line`.
861 // Remove the trailing space, then start rewrite on the next line.
862 self.result.pop();
863 self.result.push_str(&self.comment_line_separator);
864 self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
865 match rewrite_string(line, &self.fmt, self.max_width) {
866 Some(ref s) => {
867 self.is_prev_line_multi_line = s.contains('\n');
868 self.result.push_str(s);
869 }
870 None => {
871 self.is_prev_line_multi_line = false;
872 self.result.push_str(line);
873 }
874 }
875 }
876 None => {
877 self.is_prev_line_multi_line = false;
878 self.result.push_str(line);
879 }
880 }
881
882 self.fmt.shape = if self.is_prev_line_multi_line {
883 // 1 = " "
884 let offset = 1 + last_line_width(&self.result) - self.line_start.len();
885 Shape {
886 width: self.max_width.saturating_sub(offset),
887 indent: self.fmt_indent,
888 offset: self.fmt.shape.offset + offset,
889 }
890 } else {
891 Shape::legacy(self.max_width, self.fmt_indent)
892 };
893 } else {
894 if line.is_empty() && self.result.ends_with(' ') && !is_last {
895 // Remove space if this is an empty comment or a doc comment.
896 self.result.pop();
897 }
898 self.result.push_str(line);
899 self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
900 self.is_prev_line_multi_line = false;
901 }
902
903 false
904 }
905 }
906
rewrite_comment_inner( orig: &str, block_style: bool, style: CommentStyle<'_>, shape: Shape, config: &Config, is_doc_comment: bool, ) -> Option<String>907 fn rewrite_comment_inner(
908 orig: &str,
909 block_style: bool,
910 style: CommentStyle<'_>,
911 shape: Shape,
912 config: &Config,
913 is_doc_comment: bool,
914 ) -> Option<String> {
915 let mut rewriter = CommentRewrite::new(orig, block_style, shape, config);
916
917 let line_breaks = count_newlines(orig.trim_end());
918 let lines = orig
919 .lines()
920 .enumerate()
921 .map(|(i, mut line)| {
922 line = trim_end_unless_two_whitespaces(line.trim_start(), is_doc_comment);
923 // Drop old closer.
924 if i == line_breaks && line.ends_with("*/") && !line.starts_with("//") {
925 line = line[..(line.len() - 2)].trim_end();
926 }
927
928 line
929 })
930 .map(|s| left_trim_comment_line(s, &style))
931 .map(|(line, has_leading_whitespace)| {
932 if orig.starts_with("/*") && line_breaks == 0 {
933 (
934 line.trim_start(),
935 has_leading_whitespace || config.normalize_comments(),
936 )
937 } else {
938 (line, has_leading_whitespace || config.normalize_comments())
939 }
940 });
941
942 for (i, (line, has_leading_whitespace)) in lines.enumerate() {
943 if rewriter.handle_line(orig, i, line, has_leading_whitespace, is_doc_comment) {
944 break;
945 }
946 }
947
948 Some(rewriter.finish())
949 }
950
951 const RUSTFMT_CUSTOM_COMMENT_PREFIX: &str = "//#### ";
952
hide_sharp_behind_comment(s: &str) -> Cow<'_, str>953 fn hide_sharp_behind_comment(s: &str) -> Cow<'_, str> {
954 let s_trimmed = s.trim();
955 if s_trimmed.starts_with("# ") || s_trimmed == "#" {
956 Cow::from(format!("{}{}", RUSTFMT_CUSTOM_COMMENT_PREFIX, s))
957 } else {
958 Cow::from(s)
959 }
960 }
961
trim_custom_comment_prefix(s: &str) -> String962 fn trim_custom_comment_prefix(s: &str) -> String {
963 s.lines()
964 .map(|line| {
965 let left_trimmed = line.trim_start();
966 if left_trimmed.starts_with(RUSTFMT_CUSTOM_COMMENT_PREFIX) {
967 left_trimmed.trim_start_matches(RUSTFMT_CUSTOM_COMMENT_PREFIX)
968 } else {
969 line
970 }
971 })
972 .collect::<Vec<_>>()
973 .join("\n")
974 }
975
976 /// Returns `true` if the given string MAY include URLs or alike.
has_url(s: &str) -> bool977 fn has_url(s: &str) -> bool {
978 // This function may return false positive, but should get its job done in most cases.
979 s.contains("https://")
980 || s.contains("http://")
981 || s.contains("ftp://")
982 || s.contains("file://")
983 || REFERENCE_LINK_URL.is_match(s)
984 }
985
986 /// Returns true if the given string may be part of a Markdown table.
is_table_item(mut s: &str) -> bool987 fn is_table_item(mut s: &str) -> bool {
988 // This function may return false positive, but should get its job done in most cases (i.e.
989 // markdown tables with two column delimiters).
990 s = s.trim_start();
991 return s.starts_with('|')
992 && match s.rfind('|') {
993 Some(0) | None => false,
994 _ => true,
995 };
996 }
997
998 /// Given the span, rewrite the missing comment inside it if available.
999 /// Note that the given span must only include comments (or leading/trailing whitespaces).
rewrite_missing_comment( span: Span, shape: Shape, context: &RewriteContext<'_>, ) -> Option<String>1000 pub(crate) fn rewrite_missing_comment(
1001 span: Span,
1002 shape: Shape,
1003 context: &RewriteContext<'_>,
1004 ) -> Option<String> {
1005 let missing_snippet = context.snippet(span);
1006 let trimmed_snippet = missing_snippet.trim();
1007 // check the span starts with a comment
1008 let pos = trimmed_snippet.find('/');
1009 if !trimmed_snippet.is_empty() && pos.is_some() {
1010 rewrite_comment(trimmed_snippet, false, shape, context.config)
1011 } else {
1012 Some(String::new())
1013 }
1014 }
1015
1016 /// Recover the missing comments in the specified span, if available.
1017 /// The layout of the comments will be preserved as long as it does not break the code
1018 /// and its total width does not exceed the max width.
recover_missing_comment_in_span( span: Span, shape: Shape, context: &RewriteContext<'_>, used_width: usize, ) -> Option<String>1019 pub(crate) fn recover_missing_comment_in_span(
1020 span: Span,
1021 shape: Shape,
1022 context: &RewriteContext<'_>,
1023 used_width: usize,
1024 ) -> Option<String> {
1025 let missing_comment = rewrite_missing_comment(span, shape, context)?;
1026 if missing_comment.is_empty() {
1027 Some(String::new())
1028 } else {
1029 let missing_snippet = context.snippet(span);
1030 let pos = missing_snippet.find('/')?;
1031 // 1 = ` `
1032 let total_width = missing_comment.len() + used_width + 1;
1033 let force_new_line_before_comment =
1034 missing_snippet[..pos].contains('\n') || total_width > context.config.max_width();
1035 let sep = if force_new_line_before_comment {
1036 shape.indent.to_string_with_newline(context.config)
1037 } else {
1038 Cow::from(" ")
1039 };
1040 Some(format!("{}{}", sep, missing_comment))
1041 }
1042 }
1043
1044 /// Trim trailing whitespaces unless they consist of two or more whitespaces.
trim_end_unless_two_whitespaces(s: &str, is_doc_comment: bool) -> &str1045 fn trim_end_unless_two_whitespaces(s: &str, is_doc_comment: bool) -> &str {
1046 if is_doc_comment && s.ends_with(" ") {
1047 s
1048 } else {
1049 s.trim_end()
1050 }
1051 }
1052
1053 /// Trims whitespace and aligns to indent, but otherwise does not change comments.
light_rewrite_comment( orig: &str, offset: Indent, config: &Config, is_doc_comment: bool, ) -> String1054 fn light_rewrite_comment(
1055 orig: &str,
1056 offset: Indent,
1057 config: &Config,
1058 is_doc_comment: bool,
1059 ) -> String {
1060 let lines: Vec<&str> = orig
1061 .lines()
1062 .map(|l| {
1063 // This is basically just l.trim(), but in the case that a line starts
1064 // with `*` we want to leave one space before it, so it aligns with the
1065 // `*` in `/*`.
1066 let first_non_whitespace = l.find(|c| !char::is_whitespace(c));
1067 let left_trimmed = if let Some(fnw) = first_non_whitespace {
1068 if l.as_bytes()[fnw] == b'*' && fnw > 0 {
1069 &l[fnw - 1..]
1070 } else {
1071 &l[fnw..]
1072 }
1073 } else {
1074 ""
1075 };
1076 // Preserve markdown's double-space line break syntax in doc comment.
1077 trim_end_unless_two_whitespaces(left_trimmed, is_doc_comment)
1078 })
1079 .collect();
1080 lines.join(&format!("\n{}", offset.to_string(config)))
1081 }
1082
1083 /// Trims comment characters and possibly a single space from the left of a string.
1084 /// Does not trim all whitespace. If a single space is trimmed from the left of the string,
1085 /// this function returns true.
left_trim_comment_line<'a>(line: &'a str, style: &CommentStyle<'_>) -> (&'a str, bool)1086 fn left_trim_comment_line<'a>(line: &'a str, style: &CommentStyle<'_>) -> (&'a str, bool) {
1087 if line.starts_with("//! ")
1088 || line.starts_with("/// ")
1089 || line.starts_with("/*! ")
1090 || line.starts_with("/** ")
1091 {
1092 (&line[4..], true)
1093 } else if let CommentStyle::Custom(opener) = *style {
1094 if let Some(stripped) = line.strip_prefix(opener) {
1095 (stripped, true)
1096 } else {
1097 (&line[opener.trim_end().len()..], false)
1098 }
1099 } else if line.starts_with("/* ")
1100 || line.starts_with("// ")
1101 || line.starts_with("//!")
1102 || line.starts_with("///")
1103 || line.starts_with("** ")
1104 || line.starts_with("/*!")
1105 || (line.starts_with("/**") && !line.starts_with("/**/"))
1106 {
1107 (&line[3..], line.chars().nth(2).unwrap() == ' ')
1108 } else if line.starts_with("/*")
1109 || line.starts_with("* ")
1110 || line.starts_with("//")
1111 || line.starts_with("**")
1112 {
1113 (&line[2..], line.chars().nth(1).unwrap() == ' ')
1114 } else if let Some(stripped) = line.strip_prefix('*') {
1115 (stripped, false)
1116 } else {
1117 (line, line.starts_with(' '))
1118 }
1119 }
1120
1121 pub(crate) trait FindUncommented {
find_uncommented(&self, pat: &str) -> Option<usize>1122 fn find_uncommented(&self, pat: &str) -> Option<usize>;
find_last_uncommented(&self, pat: &str) -> Option<usize>1123 fn find_last_uncommented(&self, pat: &str) -> Option<usize>;
1124 }
1125
1126 impl FindUncommented for str {
find_uncommented(&self, pat: &str) -> Option<usize>1127 fn find_uncommented(&self, pat: &str) -> Option<usize> {
1128 let mut needle_iter = pat.chars();
1129 for (kind, (i, b)) in CharClasses::new(self.char_indices()) {
1130 match needle_iter.next() {
1131 None => {
1132 return Some(i - pat.len());
1133 }
1134 Some(c) => match kind {
1135 FullCodeCharKind::Normal | FullCodeCharKind::InString if b == c => {}
1136 _ => {
1137 needle_iter = pat.chars();
1138 }
1139 },
1140 }
1141 }
1142
1143 // Handle case where the pattern is a suffix of the search string
1144 match needle_iter.next() {
1145 Some(_) => None,
1146 None => Some(self.len() - pat.len()),
1147 }
1148 }
1149
find_last_uncommented(&self, pat: &str) -> Option<usize>1150 fn find_last_uncommented(&self, pat: &str) -> Option<usize> {
1151 if let Some(left) = self.find_uncommented(pat) {
1152 let mut result = left;
1153 // add 1 to use find_last_uncommented for &str after pat
1154 while let Some(next) = self[(result + 1)..].find_last_uncommented(pat) {
1155 result += next + 1;
1156 }
1157 Some(result)
1158 } else {
1159 None
1160 }
1161 }
1162 }
1163
1164 // Returns the first byte position after the first comment. The given string
1165 // is expected to be prefixed by a comment, including delimiters.
1166 // Good: `/* /* inner */ outer */ code();`
1167 // Bad: `code(); // hello\n world!`
find_comment_end(s: &str) -> Option<usize>1168 pub(crate) fn find_comment_end(s: &str) -> Option<usize> {
1169 let mut iter = CharClasses::new(s.char_indices());
1170 for (kind, (i, _c)) in &mut iter {
1171 if kind == FullCodeCharKind::Normal || kind == FullCodeCharKind::InString {
1172 return Some(i);
1173 }
1174 }
1175
1176 // Handle case where the comment ends at the end of `s`.
1177 if iter.status == CharClassesStatus::Normal {
1178 Some(s.len())
1179 } else {
1180 None
1181 }
1182 }
1183
1184 /// Returns `true` if text contains any comment.
contains_comment(text: &str) -> bool1185 pub(crate) fn contains_comment(text: &str) -> bool {
1186 CharClasses::new(text.chars()).any(|(kind, _)| kind.is_comment())
1187 }
1188
1189 pub(crate) struct CharClasses<T>
1190 where
1191 T: Iterator,
1192 T::Item: RichChar,
1193 {
1194 base: MultiPeek<T>,
1195 status: CharClassesStatus,
1196 }
1197
1198 pub(crate) trait RichChar {
get_char(&self) -> char1199 fn get_char(&self) -> char;
1200 }
1201
1202 impl RichChar for char {
get_char(&self) -> char1203 fn get_char(&self) -> char {
1204 *self
1205 }
1206 }
1207
1208 impl RichChar for (usize, char) {
get_char(&self) -> char1209 fn get_char(&self) -> char {
1210 self.1
1211 }
1212 }
1213
1214 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
1215 enum CharClassesStatus {
1216 Normal,
1217 /// Character is within a string
1218 LitString,
1219 LitStringEscape,
1220 /// Character is within a raw string
1221 LitRawString(u32),
1222 RawStringPrefix(u32),
1223 RawStringSuffix(u32),
1224 LitChar,
1225 LitCharEscape,
1226 /// Character inside a block comment, with the integer indicating the nesting deepness of the
1227 /// comment
1228 BlockComment(u32),
1229 /// Character inside a block-commented string, with the integer indicating the nesting deepness
1230 /// of the comment
1231 StringInBlockComment(u32),
1232 /// Status when the '/' has been consumed, but not yet the '*', deepness is
1233 /// the new deepness (after the comment opening).
1234 BlockCommentOpening(u32),
1235 /// Status when the '*' has been consumed, but not yet the '/', deepness is
1236 /// the new deepness (after the comment closing).
1237 BlockCommentClosing(u32),
1238 /// Character is within a line comment
1239 LineComment,
1240 }
1241
1242 /// Distinguish between functional part of code and comments
1243 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
1244 pub(crate) enum CodeCharKind {
1245 Normal,
1246 Comment,
1247 }
1248
1249 /// Distinguish between functional part of code and comments,
1250 /// describing opening and closing of comments for ease when chunking
1251 /// code from tagged characters
1252 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
1253 pub(crate) enum FullCodeCharKind {
1254 Normal,
1255 /// The first character of a comment, there is only one for a comment (always '/')
1256 StartComment,
1257 /// Any character inside a comment including the second character of comment
1258 /// marks ("//", "/*")
1259 InComment,
1260 /// Last character of a comment, '\n' for a line comment, '/' for a block comment.
1261 EndComment,
1262 /// Start of a mutlitine string inside a comment
1263 StartStringCommented,
1264 /// End of a mutlitine string inside a comment
1265 EndStringCommented,
1266 /// Inside a commented string
1267 InStringCommented,
1268 /// Start of a mutlitine string
1269 StartString,
1270 /// End of a mutlitine string
1271 EndString,
1272 /// Inside a string.
1273 InString,
1274 }
1275
1276 impl FullCodeCharKind {
is_comment(self) -> bool1277 pub(crate) fn is_comment(self) -> bool {
1278 match self {
1279 FullCodeCharKind::StartComment
1280 | FullCodeCharKind::InComment
1281 | FullCodeCharKind::EndComment
1282 | FullCodeCharKind::StartStringCommented
1283 | FullCodeCharKind::InStringCommented
1284 | FullCodeCharKind::EndStringCommented => true,
1285 _ => false,
1286 }
1287 }
1288
1289 /// Returns true if the character is inside a comment
inside_comment(self) -> bool1290 pub(crate) fn inside_comment(self) -> bool {
1291 match self {
1292 FullCodeCharKind::InComment
1293 | FullCodeCharKind::StartStringCommented
1294 | FullCodeCharKind::InStringCommented
1295 | FullCodeCharKind::EndStringCommented => true,
1296 _ => false,
1297 }
1298 }
1299
is_string(self) -> bool1300 pub(crate) fn is_string(self) -> bool {
1301 self == FullCodeCharKind::InString || self == FullCodeCharKind::StartString
1302 }
1303
1304 /// Returns true if the character is within a commented string
is_commented_string(self) -> bool1305 pub(crate) fn is_commented_string(self) -> bool {
1306 self == FullCodeCharKind::InStringCommented
1307 || self == FullCodeCharKind::StartStringCommented
1308 }
1309
to_codecharkind(self) -> CodeCharKind1310 fn to_codecharkind(self) -> CodeCharKind {
1311 if self.is_comment() {
1312 CodeCharKind::Comment
1313 } else {
1314 CodeCharKind::Normal
1315 }
1316 }
1317 }
1318
1319 impl<T> CharClasses<T>
1320 where
1321 T: Iterator,
1322 T::Item: RichChar,
1323 {
new(base: T) -> CharClasses<T>1324 pub(crate) fn new(base: T) -> CharClasses<T> {
1325 CharClasses {
1326 base: multipeek(base),
1327 status: CharClassesStatus::Normal,
1328 }
1329 }
1330 }
1331
is_raw_string_suffix<T>(iter: &mut MultiPeek<T>, count: u32) -> bool where T: Iterator, T::Item: RichChar,1332 fn is_raw_string_suffix<T>(iter: &mut MultiPeek<T>, count: u32) -> bool
1333 where
1334 T: Iterator,
1335 T::Item: RichChar,
1336 {
1337 for _ in 0..count {
1338 match iter.peek() {
1339 Some(c) if c.get_char() == '#' => continue,
1340 _ => return false,
1341 }
1342 }
1343 true
1344 }
1345
1346 impl<T> Iterator for CharClasses<T>
1347 where
1348 T: Iterator,
1349 T::Item: RichChar,
1350 {
1351 type Item = (FullCodeCharKind, T::Item);
1352
next(&mut self) -> Option<(FullCodeCharKind, T::Item)>1353 fn next(&mut self) -> Option<(FullCodeCharKind, T::Item)> {
1354 let item = self.base.next()?;
1355 let chr = item.get_char();
1356 let mut char_kind = FullCodeCharKind::Normal;
1357 self.status = match self.status {
1358 CharClassesStatus::LitRawString(sharps) => {
1359 char_kind = FullCodeCharKind::InString;
1360 match chr {
1361 '"' => {
1362 if sharps == 0 {
1363 char_kind = FullCodeCharKind::Normal;
1364 CharClassesStatus::Normal
1365 } else if is_raw_string_suffix(&mut self.base, sharps) {
1366 CharClassesStatus::RawStringSuffix(sharps)
1367 } else {
1368 CharClassesStatus::LitRawString(sharps)
1369 }
1370 }
1371 _ => CharClassesStatus::LitRawString(sharps),
1372 }
1373 }
1374 CharClassesStatus::RawStringPrefix(sharps) => {
1375 char_kind = FullCodeCharKind::InString;
1376 match chr {
1377 '#' => CharClassesStatus::RawStringPrefix(sharps + 1),
1378 '"' => CharClassesStatus::LitRawString(sharps),
1379 _ => CharClassesStatus::Normal, // Unreachable.
1380 }
1381 }
1382 CharClassesStatus::RawStringSuffix(sharps) => {
1383 match chr {
1384 '#' => {
1385 if sharps == 1 {
1386 CharClassesStatus::Normal
1387 } else {
1388 char_kind = FullCodeCharKind::InString;
1389 CharClassesStatus::RawStringSuffix(sharps - 1)
1390 }
1391 }
1392 _ => CharClassesStatus::Normal, // Unreachable
1393 }
1394 }
1395 CharClassesStatus::LitString => {
1396 char_kind = FullCodeCharKind::InString;
1397 match chr {
1398 '"' => CharClassesStatus::Normal,
1399 '\\' => CharClassesStatus::LitStringEscape,
1400 _ => CharClassesStatus::LitString,
1401 }
1402 }
1403 CharClassesStatus::LitStringEscape => {
1404 char_kind = FullCodeCharKind::InString;
1405 CharClassesStatus::LitString
1406 }
1407 CharClassesStatus::LitChar => match chr {
1408 '\\' => CharClassesStatus::LitCharEscape,
1409 '\'' => CharClassesStatus::Normal,
1410 _ => CharClassesStatus::LitChar,
1411 },
1412 CharClassesStatus::LitCharEscape => CharClassesStatus::LitChar,
1413 CharClassesStatus::Normal => match chr {
1414 'r' => match self.base.peek().map(RichChar::get_char) {
1415 Some('#') | Some('"') => {
1416 char_kind = FullCodeCharKind::InString;
1417 CharClassesStatus::RawStringPrefix(0)
1418 }
1419 _ => CharClassesStatus::Normal,
1420 },
1421 '"' => {
1422 char_kind = FullCodeCharKind::InString;
1423 CharClassesStatus::LitString
1424 }
1425 '\'' => {
1426 // HACK: Work around mut borrow.
1427 match self.base.peek() {
1428 Some(next) if next.get_char() == '\\' => {
1429 self.status = CharClassesStatus::LitChar;
1430 return Some((char_kind, item));
1431 }
1432 _ => (),
1433 }
1434
1435 match self.base.peek() {
1436 Some(next) if next.get_char() == '\'' => CharClassesStatus::LitChar,
1437 _ => CharClassesStatus::Normal,
1438 }
1439 }
1440 '/' => match self.base.peek() {
1441 Some(next) if next.get_char() == '*' => {
1442 self.status = CharClassesStatus::BlockCommentOpening(1);
1443 return Some((FullCodeCharKind::StartComment, item));
1444 }
1445 Some(next) if next.get_char() == '/' => {
1446 self.status = CharClassesStatus::LineComment;
1447 return Some((FullCodeCharKind::StartComment, item));
1448 }
1449 _ => CharClassesStatus::Normal,
1450 },
1451 _ => CharClassesStatus::Normal,
1452 },
1453 CharClassesStatus::StringInBlockComment(deepness) => {
1454 char_kind = FullCodeCharKind::InStringCommented;
1455 if chr == '"' {
1456 CharClassesStatus::BlockComment(deepness)
1457 } else if chr == '*' && self.base.peek().map(RichChar::get_char) == Some('/') {
1458 char_kind = FullCodeCharKind::InComment;
1459 CharClassesStatus::BlockCommentClosing(deepness - 1)
1460 } else {
1461 CharClassesStatus::StringInBlockComment(deepness)
1462 }
1463 }
1464 CharClassesStatus::BlockComment(deepness) => {
1465 assert_ne!(deepness, 0);
1466 char_kind = FullCodeCharKind::InComment;
1467 match self.base.peek() {
1468 Some(next) if next.get_char() == '/' && chr == '*' => {
1469 CharClassesStatus::BlockCommentClosing(deepness - 1)
1470 }
1471 Some(next) if next.get_char() == '*' && chr == '/' => {
1472 CharClassesStatus::BlockCommentOpening(deepness + 1)
1473 }
1474 _ if chr == '"' => CharClassesStatus::StringInBlockComment(deepness),
1475 _ => self.status,
1476 }
1477 }
1478 CharClassesStatus::BlockCommentOpening(deepness) => {
1479 assert_eq!(chr, '*');
1480 self.status = CharClassesStatus::BlockComment(deepness);
1481 return Some((FullCodeCharKind::InComment, item));
1482 }
1483 CharClassesStatus::BlockCommentClosing(deepness) => {
1484 assert_eq!(chr, '/');
1485 if deepness == 0 {
1486 self.status = CharClassesStatus::Normal;
1487 return Some((FullCodeCharKind::EndComment, item));
1488 } else {
1489 self.status = CharClassesStatus::BlockComment(deepness);
1490 return Some((FullCodeCharKind::InComment, item));
1491 }
1492 }
1493 CharClassesStatus::LineComment => match chr {
1494 '\n' => {
1495 self.status = CharClassesStatus::Normal;
1496 return Some((FullCodeCharKind::EndComment, item));
1497 }
1498 _ => {
1499 self.status = CharClassesStatus::LineComment;
1500 return Some((FullCodeCharKind::InComment, item));
1501 }
1502 },
1503 };
1504 Some((char_kind, item))
1505 }
1506 }
1507
1508 /// An iterator over the lines of a string, paired with the char kind at the
1509 /// end of the line.
1510 pub(crate) struct LineClasses<'a> {
1511 base: iter::Peekable<CharClasses<std::str::Chars<'a>>>,
1512 kind: FullCodeCharKind,
1513 }
1514
1515 impl<'a> LineClasses<'a> {
new(s: &'a str) -> Self1516 pub(crate) fn new(s: &'a str) -> Self {
1517 LineClasses {
1518 base: CharClasses::new(s.chars()).peekable(),
1519 kind: FullCodeCharKind::Normal,
1520 }
1521 }
1522 }
1523
1524 impl<'a> Iterator for LineClasses<'a> {
1525 type Item = (FullCodeCharKind, String);
1526
next(&mut self) -> Option<Self::Item>1527 fn next(&mut self) -> Option<Self::Item> {
1528 self.base.peek()?;
1529
1530 let mut line = String::new();
1531
1532 let start_kind = match self.base.peek() {
1533 Some((kind, _)) => *kind,
1534 None => unreachable!(),
1535 };
1536
1537 for (kind, c) in self.base.by_ref() {
1538 // needed to set the kind of the ending character on the last line
1539 self.kind = kind;
1540 if c == '\n' {
1541 self.kind = match (start_kind, kind) {
1542 (FullCodeCharKind::Normal, FullCodeCharKind::InString) => {
1543 FullCodeCharKind::StartString
1544 }
1545 (FullCodeCharKind::InString, FullCodeCharKind::Normal) => {
1546 FullCodeCharKind::EndString
1547 }
1548 (FullCodeCharKind::InComment, FullCodeCharKind::InStringCommented) => {
1549 FullCodeCharKind::StartStringCommented
1550 }
1551 (FullCodeCharKind::InStringCommented, FullCodeCharKind::InComment) => {
1552 FullCodeCharKind::EndStringCommented
1553 }
1554 _ => kind,
1555 };
1556 break;
1557 }
1558 line.push(c);
1559 }
1560
1561 // Workaround for CRLF newline.
1562 if line.ends_with('\r') {
1563 line.pop();
1564 }
1565
1566 Some((self.kind, line))
1567 }
1568 }
1569
1570 /// Iterator over functional and commented parts of a string. Any part of a string is either
1571 /// functional code, either *one* block comment, either *one* line comment. Whitespace between
1572 /// comments is functional code. Line comments contain their ending newlines.
1573 struct UngroupedCommentCodeSlices<'a> {
1574 slice: &'a str,
1575 iter: iter::Peekable<CharClasses<std::str::CharIndices<'a>>>,
1576 }
1577
1578 impl<'a> UngroupedCommentCodeSlices<'a> {
new(code: &'a str) -> UngroupedCommentCodeSlices<'a>1579 fn new(code: &'a str) -> UngroupedCommentCodeSlices<'a> {
1580 UngroupedCommentCodeSlices {
1581 slice: code,
1582 iter: CharClasses::new(code.char_indices()).peekable(),
1583 }
1584 }
1585 }
1586
1587 impl<'a> Iterator for UngroupedCommentCodeSlices<'a> {
1588 type Item = (CodeCharKind, usize, &'a str);
1589
next(&mut self) -> Option<Self::Item>1590 fn next(&mut self) -> Option<Self::Item> {
1591 let (kind, (start_idx, _)) = self.iter.next()?;
1592 match kind {
1593 FullCodeCharKind::Normal | FullCodeCharKind::InString => {
1594 // Consume all the Normal code
1595 while let Some(&(char_kind, _)) = self.iter.peek() {
1596 if char_kind.is_comment() {
1597 break;
1598 }
1599 let _ = self.iter.next();
1600 }
1601 }
1602 FullCodeCharKind::StartComment => {
1603 // Consume the whole comment
1604 loop {
1605 match self.iter.next() {
1606 Some((kind, ..)) if kind.inside_comment() => continue,
1607 _ => break,
1608 }
1609 }
1610 }
1611 _ => panic!(),
1612 }
1613 let slice = match self.iter.peek() {
1614 Some(&(_, (end_idx, _))) => &self.slice[start_idx..end_idx],
1615 None => &self.slice[start_idx..],
1616 };
1617 Some((
1618 if kind.is_comment() {
1619 CodeCharKind::Comment
1620 } else {
1621 CodeCharKind::Normal
1622 },
1623 start_idx,
1624 slice,
1625 ))
1626 }
1627 }
1628
1629 /// Iterator over an alternating sequence of functional and commented parts of
1630 /// a string. The first item is always a, possibly zero length, subslice of
1631 /// functional text. Line style comments contain their ending newlines.
1632 pub(crate) struct CommentCodeSlices<'a> {
1633 slice: &'a str,
1634 last_slice_kind: CodeCharKind,
1635 last_slice_end: usize,
1636 }
1637
1638 impl<'a> CommentCodeSlices<'a> {
new(slice: &'a str) -> CommentCodeSlices<'a>1639 pub(crate) fn new(slice: &'a str) -> CommentCodeSlices<'a> {
1640 CommentCodeSlices {
1641 slice,
1642 last_slice_kind: CodeCharKind::Comment,
1643 last_slice_end: 0,
1644 }
1645 }
1646 }
1647
1648 impl<'a> Iterator for CommentCodeSlices<'a> {
1649 type Item = (CodeCharKind, usize, &'a str);
1650
next(&mut self) -> Option<Self::Item>1651 fn next(&mut self) -> Option<Self::Item> {
1652 if self.last_slice_end == self.slice.len() {
1653 return None;
1654 }
1655
1656 let mut sub_slice_end = self.last_slice_end;
1657 let mut first_whitespace = None;
1658 let subslice = &self.slice[self.last_slice_end..];
1659 let mut iter = CharClasses::new(subslice.char_indices());
1660
1661 for (kind, (i, c)) in &mut iter {
1662 let is_comment_connector = self.last_slice_kind == CodeCharKind::Normal
1663 && &subslice[..2] == "//"
1664 && [' ', '\t'].contains(&c);
1665
1666 if is_comment_connector && first_whitespace.is_none() {
1667 first_whitespace = Some(i);
1668 }
1669
1670 if kind.to_codecharkind() == self.last_slice_kind && !is_comment_connector {
1671 let last_index = match first_whitespace {
1672 Some(j) => j,
1673 None => i,
1674 };
1675 sub_slice_end = self.last_slice_end + last_index;
1676 break;
1677 }
1678
1679 if !is_comment_connector {
1680 first_whitespace = None;
1681 }
1682 }
1683
1684 if let (None, true) = (iter.next(), sub_slice_end == self.last_slice_end) {
1685 // This was the last subslice.
1686 sub_slice_end = match first_whitespace {
1687 Some(i) => self.last_slice_end + i,
1688 None => self.slice.len(),
1689 };
1690 }
1691
1692 let kind = match self.last_slice_kind {
1693 CodeCharKind::Comment => CodeCharKind::Normal,
1694 CodeCharKind::Normal => CodeCharKind::Comment,
1695 };
1696 let res = (
1697 kind,
1698 self.last_slice_end,
1699 &self.slice[self.last_slice_end..sub_slice_end],
1700 );
1701 self.last_slice_end = sub_slice_end;
1702 self.last_slice_kind = kind;
1703
1704 Some(res)
1705 }
1706 }
1707
1708 /// Checks is `new` didn't miss any comment from `span`, if it removed any, return previous text
1709 /// (if it fits in the width/offset, else return `None`), else return `new`
recover_comment_removed( new: String, span: Span, context: &RewriteContext<'_>, ) -> Option<String>1710 pub(crate) fn recover_comment_removed(
1711 new: String,
1712 span: Span,
1713 context: &RewriteContext<'_>,
1714 ) -> Option<String> {
1715 let snippet = context.snippet(span);
1716 if snippet != new && changed_comment_content(snippet, &new) {
1717 // We missed some comments. Warn and keep the original text.
1718 if context.config.error_on_unformatted() {
1719 context.report.append(
1720 context.parse_sess.span_to_filename(span),
1721 vec![FormattingError::from_span(
1722 span,
1723 context.parse_sess,
1724 ErrorKind::LostComment,
1725 )],
1726 );
1727 }
1728 Some(snippet.to_owned())
1729 } else {
1730 Some(new)
1731 }
1732 }
1733
filter_normal_code(code: &str) -> String1734 pub(crate) fn filter_normal_code(code: &str) -> String {
1735 let mut buffer = String::with_capacity(code.len());
1736 LineClasses::new(code).for_each(|(kind, line)| match kind {
1737 FullCodeCharKind::Normal
1738 | FullCodeCharKind::StartString
1739 | FullCodeCharKind::InString
1740 | FullCodeCharKind::EndString => {
1741 buffer.push_str(&line);
1742 buffer.push('\n');
1743 }
1744 _ => (),
1745 });
1746 if !code.ends_with('\n') && buffer.ends_with('\n') {
1747 buffer.pop();
1748 }
1749 buffer
1750 }
1751
1752 /// Returns `true` if the two strings of code have the same payload of comments.
1753 /// The payload of comments is everything in the string except:
1754 /// - actual code (not comments),
1755 /// - comment start/end marks,
1756 /// - whitespace,
1757 /// - '*' at the beginning of lines in block comments.
changed_comment_content(orig: &str, new: &str) -> bool1758 fn changed_comment_content(orig: &str, new: &str) -> bool {
1759 // Cannot write this as a fn since we cannot return types containing closures.
1760 let code_comment_content = |code| {
1761 let slices = UngroupedCommentCodeSlices::new(code);
1762 slices
1763 .filter(|&(ref kind, _, _)| *kind == CodeCharKind::Comment)
1764 .flat_map(|(_, _, s)| CommentReducer::new(s))
1765 };
1766 let res = code_comment_content(orig).ne(code_comment_content(new));
1767 debug!(
1768 "comment::changed_comment_content: {}\norig: '{}'\nnew: '{}'\nraw_old: {}\nraw_new: {}",
1769 res,
1770 orig,
1771 new,
1772 code_comment_content(orig).collect::<String>(),
1773 code_comment_content(new).collect::<String>()
1774 );
1775 res
1776 }
1777
1778 /// Iterator over the 'payload' characters of a comment.
1779 /// It skips whitespace, comment start/end marks, and '*' at the beginning of lines.
1780 /// The comment must be one comment, ie not more than one start mark (no multiple line comments,
1781 /// for example).
1782 struct CommentReducer<'a> {
1783 is_block: bool,
1784 at_start_line: bool,
1785 iter: std::str::Chars<'a>,
1786 }
1787
1788 impl<'a> CommentReducer<'a> {
new(comment: &'a str) -> CommentReducer<'a>1789 fn new(comment: &'a str) -> CommentReducer<'a> {
1790 let is_block = comment.starts_with("/*");
1791 let comment = remove_comment_header(comment);
1792 CommentReducer {
1793 is_block,
1794 // There are no supplementary '*' on the first line.
1795 at_start_line: false,
1796 iter: comment.chars(),
1797 }
1798 }
1799 }
1800
1801 impl<'a> Iterator for CommentReducer<'a> {
1802 type Item = char;
1803
next(&mut self) -> Option<Self::Item>1804 fn next(&mut self) -> Option<Self::Item> {
1805 loop {
1806 let mut c = self.iter.next()?;
1807 if self.is_block && self.at_start_line {
1808 while c.is_whitespace() {
1809 c = self.iter.next()?;
1810 }
1811 // Ignore leading '*'.
1812 if c == '*' {
1813 c = self.iter.next()?;
1814 }
1815 } else if c == '\n' {
1816 self.at_start_line = true;
1817 }
1818 if !c.is_whitespace() {
1819 return Some(c);
1820 }
1821 }
1822 }
1823 }
1824
remove_comment_header(comment: &str) -> &str1825 fn remove_comment_header(comment: &str) -> &str {
1826 if comment.starts_with("///") || comment.starts_with("//!") {
1827 &comment[3..]
1828 } else if let Some(stripped) = comment.strip_prefix("//") {
1829 stripped
1830 } else if (comment.starts_with("/**") && !comment.starts_with("/**/"))
1831 || comment.starts_with("/*!")
1832 {
1833 &comment[3..comment.len() - 2]
1834 } else {
1835 assert!(
1836 comment.starts_with("/*"),
1837 "string '{}' is not a comment",
1838 comment
1839 );
1840 &comment[2..comment.len() - 2]
1841 }
1842 }
1843
1844 #[cfg(test)]
1845 mod test {
1846 use super::*;
1847 use crate::shape::{Indent, Shape};
1848
1849 #[test]
char_classes()1850 fn char_classes() {
1851 let mut iter = CharClasses::new("//\n\n".chars());
1852
1853 assert_eq!((FullCodeCharKind::StartComment, '/'), iter.next().unwrap());
1854 assert_eq!((FullCodeCharKind::InComment, '/'), iter.next().unwrap());
1855 assert_eq!((FullCodeCharKind::EndComment, '\n'), iter.next().unwrap());
1856 assert_eq!((FullCodeCharKind::Normal, '\n'), iter.next().unwrap());
1857 assert_eq!(None, iter.next());
1858 }
1859
1860 #[test]
comment_code_slices()1861 fn comment_code_slices() {
1862 let input = "code(); /* test */ 1 + 1";
1863 let mut iter = CommentCodeSlices::new(input);
1864
1865 assert_eq!((CodeCharKind::Normal, 0, "code(); "), iter.next().unwrap());
1866 assert_eq!(
1867 (CodeCharKind::Comment, 8, "/* test */"),
1868 iter.next().unwrap()
1869 );
1870 assert_eq!((CodeCharKind::Normal, 18, " 1 + 1"), iter.next().unwrap());
1871 assert_eq!(None, iter.next());
1872 }
1873
1874 #[test]
comment_code_slices_two()1875 fn comment_code_slices_two() {
1876 let input = "// comment\n test();";
1877 let mut iter = CommentCodeSlices::new(input);
1878
1879 assert_eq!((CodeCharKind::Normal, 0, ""), iter.next().unwrap());
1880 assert_eq!(
1881 (CodeCharKind::Comment, 0, "// comment\n"),
1882 iter.next().unwrap()
1883 );
1884 assert_eq!(
1885 (CodeCharKind::Normal, 11, " test();"),
1886 iter.next().unwrap()
1887 );
1888 assert_eq!(None, iter.next());
1889 }
1890
1891 #[test]
comment_code_slices_three()1892 fn comment_code_slices_three() {
1893 let input = "1 // comment\n // comment2\n\n";
1894 let mut iter = CommentCodeSlices::new(input);
1895
1896 assert_eq!((CodeCharKind::Normal, 0, "1 "), iter.next().unwrap());
1897 assert_eq!(
1898 (CodeCharKind::Comment, 2, "// comment\n // comment2\n"),
1899 iter.next().unwrap()
1900 );
1901 assert_eq!((CodeCharKind::Normal, 29, "\n"), iter.next().unwrap());
1902 assert_eq!(None, iter.next());
1903 }
1904
1905 #[test]
1906 #[rustfmt::skip]
format_doc_comments()1907 fn format_doc_comments() {
1908 let mut wrap_normalize_config: crate::config::Config = Default::default();
1909 wrap_normalize_config.set().wrap_comments(true);
1910 wrap_normalize_config.set().normalize_comments(true);
1911
1912 let mut wrap_config: crate::config::Config = Default::default();
1913 wrap_config.set().wrap_comments(true);
1914
1915 let comment = rewrite_comment(" //test",
1916 true,
1917 Shape::legacy(100, Indent::new(0, 100)),
1918 &wrap_normalize_config).unwrap();
1919 assert_eq!("/* test */", comment);
1920
1921 let comment = rewrite_comment("// comment on a",
1922 false,
1923 Shape::legacy(10, Indent::empty()),
1924 &wrap_normalize_config).unwrap();
1925 assert_eq!("// comment\n// on a", comment);
1926
1927 let comment = rewrite_comment("// A multi line comment\n // between args.",
1928 false,
1929 Shape::legacy(60, Indent::new(0, 12)),
1930 &wrap_normalize_config).unwrap();
1931 assert_eq!("// A multi line comment\n // between args.", comment);
1932
1933 let input = "// comment";
1934 let expected =
1935 "/* comment */";
1936 let comment = rewrite_comment(input,
1937 true,
1938 Shape::legacy(9, Indent::new(0, 69)),
1939 &wrap_normalize_config).unwrap();
1940 assert_eq!(expected, comment);
1941
1942 let comment = rewrite_comment("/* trimmed */",
1943 true,
1944 Shape::legacy(100, Indent::new(0, 100)),
1945 &wrap_normalize_config).unwrap();
1946 assert_eq!("/* trimmed */", comment);
1947
1948 // Check that different comment style are properly recognised.
1949 let comment = rewrite_comment(r#"/// test1
1950 /// test2
1951 /*
1952 * test3
1953 */"#,
1954 false,
1955 Shape::legacy(100, Indent::new(0, 0)),
1956 &wrap_normalize_config).unwrap();
1957 assert_eq!("/// test1\n/// test2\n// test3", comment);
1958
1959 // Check that the blank line marks the end of a commented paragraph.
1960 let comment = rewrite_comment(r#"// test1
1961
1962 // test2"#,
1963 false,
1964 Shape::legacy(100, Indent::new(0, 0)),
1965 &wrap_normalize_config).unwrap();
1966 assert_eq!("// test1\n\n// test2", comment);
1967
1968 // Check that the blank line marks the end of a custom-commented paragraph.
1969 let comment = rewrite_comment(r#"//@ test1
1970
1971 //@ test2"#,
1972 false,
1973 Shape::legacy(100, Indent::new(0, 0)),
1974 &wrap_normalize_config).unwrap();
1975 assert_eq!("//@ test1\n\n//@ test2", comment);
1976
1977 // Check that bare lines are just indented but otherwise left unchanged.
1978 let comment = rewrite_comment(r#"// test1
1979 /*
1980 a bare line!
1981
1982 another bare line!
1983 */"#,
1984 false,
1985 Shape::legacy(100, Indent::new(0, 0)),
1986 &wrap_config).unwrap();
1987 assert_eq!("// test1\n/*\n a bare line!\n\n another bare line!\n*/", comment);
1988 }
1989
1990 // This is probably intended to be a non-test fn, but it is not used.
1991 // We should keep this around unless it helps us test stuff to remove it.
uncommented(text: &str) -> String1992 fn uncommented(text: &str) -> String {
1993 CharClasses::new(text.chars())
1994 .filter_map(|(s, c)| match s {
1995 FullCodeCharKind::Normal | FullCodeCharKind::InString => Some(c),
1996 _ => None,
1997 })
1998 .collect()
1999 }
2000
2001 #[test]
test_uncommented()2002 fn test_uncommented() {
2003 assert_eq!(&uncommented("abc/*...*/"), "abc");
2004 assert_eq!(
2005 &uncommented("// .... /* \n../* /* *** / */ */a/* // */c\n"),
2006 "..ac\n"
2007 );
2008 assert_eq!(&uncommented("abc \" /* */\" qsdf"), "abc \" /* */\" qsdf");
2009 }
2010
2011 #[test]
test_contains_comment()2012 fn test_contains_comment() {
2013 assert_eq!(contains_comment("abc"), false);
2014 assert_eq!(contains_comment("abc // qsdf"), true);
2015 assert_eq!(contains_comment("abc /* kqsdf"), true);
2016 assert_eq!(contains_comment("abc \" /* */\" qsdf"), false);
2017 }
2018
2019 #[test]
test_find_uncommented()2020 fn test_find_uncommented() {
2021 fn check(haystack: &str, needle: &str, expected: Option<usize>) {
2022 assert_eq!(expected, haystack.find_uncommented(needle));
2023 }
2024
2025 check("/*/ */test", "test", Some(6));
2026 check("//test\ntest", "test", Some(7));
2027 check("/* comment only */", "whatever", None);
2028 check(
2029 "/* comment */ some text /* more commentary */ result",
2030 "result",
2031 Some(46),
2032 );
2033 check("sup // sup", "p", Some(2));
2034 check("sup", "x", None);
2035 check(r#"π? /**/ π is nice!"#, r#"π is nice"#, Some(9));
2036 check("/*sup yo? \n sup*/ sup", "p", Some(20));
2037 check("hel/*lohello*/lo", "hello", None);
2038 check("acb", "ab", None);
2039 check(",/*A*/ ", ",", Some(0));
2040 check("abc", "abc", Some(0));
2041 check("/* abc */", "abc", None);
2042 check("/**/abc/* */", "abc", Some(4));
2043 check("\"/* abc */\"", "abc", Some(4));
2044 check("\"/* abc", "abc", Some(4));
2045 }
2046
2047 #[test]
test_filter_normal_code()2048 fn test_filter_normal_code() {
2049 let s = r#"
2050 fn main() {
2051 println!("hello, world");
2052 }
2053 "#;
2054 assert_eq!(s, filter_normal_code(s));
2055 let s_with_comment = r#"
2056 fn main() {
2057 // hello, world
2058 println!("hello, world");
2059 }
2060 "#;
2061 assert_eq!(s, filter_normal_code(s_with_comment));
2062 }
2063
2064 #[test]
test_itemized_block_first_line_handling()2065 fn test_itemized_block_first_line_handling() {
2066 fn run_test(
2067 test_input: &str,
2068 expected_line: &str,
2069 expected_indent: usize,
2070 expected_opener: &str,
2071 expected_line_start: &str,
2072 ) {
2073 let block = ItemizedBlock::new(test_input).unwrap();
2074 assert_eq!(1, block.lines.len(), "test_input: {:?}", test_input);
2075 assert_eq!(
2076 expected_line, &block.lines[0],
2077 "test_input: {:?}",
2078 test_input
2079 );
2080 assert_eq!(
2081 expected_indent, block.indent,
2082 "test_input: {:?}",
2083 test_input
2084 );
2085 assert_eq!(
2086 expected_opener, &block.opener,
2087 "test_input: {:?}",
2088 test_input
2089 );
2090 assert_eq!(
2091 expected_line_start, &block.line_start,
2092 "test_input: {:?}",
2093 test_input
2094 );
2095 }
2096
2097 run_test("- foo", "foo", 2, "- ", " ");
2098 run_test("* foo", "foo", 2, "* ", " ");
2099 run_test("> foo", "foo", 2, "> ", "> ");
2100
2101 run_test("1. foo", "foo", 3, "1. ", " ");
2102 run_test("12. foo", "foo", 4, "12. ", " ");
2103 run_test("1) foo", "foo", 3, "1) ", " ");
2104 run_test("12) foo", "foo", 4, "12) ", " ");
2105
2106 run_test(" - foo", "foo", 6, " - ", " ");
2107
2108 // https://spec.commonmark.org/0.30 says: "A start number may begin with 0s":
2109 run_test("0. foo", "foo", 3, "0. ", " ");
2110 run_test("01. foo", "foo", 4, "01. ", " ");
2111 }
2112
2113 #[test]
test_itemized_block_nonobvious_markers_are_rejected()2114 fn test_itemized_block_nonobvious_markers_are_rejected() {
2115 let test_inputs = vec![
2116 // Non-numeric item markers (e.g. `a.` or `iv.`) are not allowed by
2117 // https://spec.commonmark.org/0.30/#ordered-list-marker. We also note that allowing
2118 // them would risk misidentifying regular words as item markers. See also the
2119 // discussion in https://talk.commonmark.org/t/blank-lines-before-lists-revisited/1990
2120 "word. rest of the paragraph.",
2121 "a. maybe this is a list item? maybe not?",
2122 "iv. maybe this is a list item? maybe not?",
2123 // Numbers with 3 or more digits are not recognized as item markers, to avoid
2124 // formatting the following example as a list:
2125 //
2126 // ```
2127 // The Captain died in
2128 // 1868. He was buried in...
2129 // ```
2130 "123. only 2-digit numbers are recognized as item markers.",
2131 // Parens:
2132 "123) giving some coverage to parens as well.",
2133 "a) giving some coverage to parens as well.",
2134 // https://spec.commonmark.org/0.30 says that "at least one space or tab is needed
2135 // between the list marker and any following content":
2136 "1.Not a list item.",
2137 "1.2.3. Not a list item.",
2138 "1)Not a list item.",
2139 "-Not a list item.",
2140 "+Not a list item.",
2141 "+1 not a list item.",
2142 // https://spec.commonmark.org/0.30 says: "A start number may not be negative":
2143 "-1. Not a list item.",
2144 "-1 Not a list item.",
2145 ];
2146 for line in test_inputs.iter() {
2147 let maybe_block = ItemizedBlock::new(line);
2148 assert!(
2149 maybe_block.is_none(),
2150 "The following line shouldn't be classified as a list item: {}",
2151 line
2152 );
2153 }
2154 }
2155 }
2156