1 use anyhow::{anyhow, bail};
2 use std::{
3 borrow::Cow,
4 io::{BufRead, Lines},
5 iter::Peekable,
6 };
7
8 const LISTING_DELIMITER: &str = "----";
9 const IMAGE_BLOCK_PREFIX: &str = "image::";
10 const VIDEO_BLOCK_PREFIX: &str = "video::";
11
12 struct Converter<'a, 'b, R: BufRead> {
13 iter: &'a mut Peekable<Lines<R>>,
14 output: &'b mut String,
15 }
16
17 impl<'a, 'b, R: BufRead> Converter<'a, 'b, R> {
new(iter: &'a mut Peekable<Lines<R>>, output: &'b mut String) -> Self18 fn new(iter: &'a mut Peekable<Lines<R>>, output: &'b mut String) -> Self {
19 Self { iter, output }
20 }
21
process(&mut self) -> anyhow::Result<()>22 fn process(&mut self) -> anyhow::Result<()> {
23 self.process_document_header()?;
24 self.skip_blank_lines()?;
25 self.output.push('\n');
26
27 loop {
28 let line = self.iter.peek().unwrap().as_deref().map_err(|e| anyhow!("{e}"))?;
29 if get_title(line).is_some() {
30 let line = self.iter.next().unwrap().unwrap();
31 let (level, title) = get_title(&line).unwrap();
32 self.write_title(level, title);
33 } else if get_list_item(line).is_some() {
34 self.process_list()?;
35 } else if line.starts_with('[') {
36 self.process_source_code_block(0)?;
37 } else if line.starts_with(LISTING_DELIMITER) {
38 self.process_listing_block(None, 0)?;
39 } else if line.starts_with('.') {
40 self.process_block_with_title(0)?;
41 } else if line.starts_with(IMAGE_BLOCK_PREFIX) {
42 self.process_image_block(None, 0)?;
43 } else if line.starts_with(VIDEO_BLOCK_PREFIX) {
44 self.process_video_block(None, 0)?;
45 } else {
46 self.process_paragraph(0, |line| line.is_empty())?;
47 }
48
49 self.skip_blank_lines()?;
50 if self.iter.peek().is_none() {
51 break;
52 }
53 self.output.push('\n');
54 }
55 Ok(())
56 }
57
process_document_header(&mut self) -> anyhow::Result<()>58 fn process_document_header(&mut self) -> anyhow::Result<()> {
59 self.process_document_title()?;
60
61 while let Some(line) = self.iter.next() {
62 let line = line?;
63 if line.is_empty() {
64 break;
65 }
66 if !line.starts_with(':') {
67 self.write_line(&line, 0)
68 }
69 }
70
71 Ok(())
72 }
73
process_document_title(&mut self) -> anyhow::Result<()>74 fn process_document_title(&mut self) -> anyhow::Result<()> {
75 if let Some(Ok(line)) = self.iter.next() {
76 if let Some((level, title)) = get_title(&line) {
77 let title = process_inline_macros(title)?;
78 if level == 1 {
79 self.write_title(level, &title);
80 return Ok(());
81 }
82 }
83 }
84 bail!("document title not found")
85 }
86
process_list(&mut self) -> anyhow::Result<()>87 fn process_list(&mut self) -> anyhow::Result<()> {
88 let mut nesting = ListNesting::new();
89 while let Some(line) = self.iter.peek() {
90 let line = line.as_deref().map_err(|e| anyhow!("{e}"))?;
91
92 if get_list_item(line).is_some() {
93 let line = self.iter.next().unwrap()?;
94 let line = process_inline_macros(&line)?;
95 let (marker, item) = get_list_item(&line).unwrap();
96 nesting.set_current(marker);
97 self.write_list_item(item, &nesting);
98 self.process_paragraph(nesting.indent(), |line| {
99 line.is_empty() || get_list_item(line).is_some() || line == "+"
100 })?;
101 } else if line == "+" {
102 let _ = self.iter.next().unwrap()?;
103 let line = self
104 .iter
105 .peek()
106 .ok_or_else(|| anyhow!("list continuation unexpectedly terminated"))?;
107 let line = line.as_deref().map_err(|e| anyhow!("{e}"))?;
108
109 let indent = nesting.indent();
110 if line.starts_with('[') {
111 self.write_line("", 0);
112 self.process_source_code_block(indent)?;
113 } else if line.starts_with(LISTING_DELIMITER) {
114 self.write_line("", 0);
115 self.process_listing_block(None, indent)?;
116 } else if line.starts_with('.') {
117 self.write_line("", 0);
118 self.process_block_with_title(indent)?;
119 } else if line.starts_with(IMAGE_BLOCK_PREFIX) {
120 self.write_line("", 0);
121 self.process_image_block(None, indent)?;
122 } else if line.starts_with(VIDEO_BLOCK_PREFIX) {
123 self.write_line("", 0);
124 self.process_video_block(None, indent)?;
125 } else {
126 self.write_line("", 0);
127 let current = nesting.current().unwrap();
128 self.process_paragraph(indent, |line| {
129 line.is_empty()
130 || get_list_item(line).filter(|(m, _)| m == current).is_some()
131 || line == "+"
132 })?;
133 }
134 } else {
135 break;
136 }
137 self.skip_blank_lines()?;
138 }
139
140 Ok(())
141 }
142
process_source_code_block(&mut self, level: usize) -> anyhow::Result<()>143 fn process_source_code_block(&mut self, level: usize) -> anyhow::Result<()> {
144 if let Some(Ok(line)) = self.iter.next() {
145 if let Some(styles) = line.strip_prefix("[source").and_then(|s| s.strip_suffix(']')) {
146 let mut styles = styles.split(',');
147 if !styles.next().unwrap().is_empty() {
148 bail!("not a source code block");
149 }
150 let language = styles.next();
151 return self.process_listing_block(language, level);
152 }
153 }
154 bail!("not a source code block")
155 }
156
process_listing_block(&mut self, style: Option<&str>, level: usize) -> anyhow::Result<()>157 fn process_listing_block(&mut self, style: Option<&str>, level: usize) -> anyhow::Result<()> {
158 if let Some(Ok(line)) = self.iter.next() {
159 if line == LISTING_DELIMITER {
160 self.write_indent(level);
161 self.output.push_str("```");
162 if let Some(style) = style {
163 self.output.push_str(style);
164 }
165 self.output.push('\n');
166 while let Some(line) = self.iter.next() {
167 let line = line?;
168 if line == LISTING_DELIMITER {
169 self.write_line("```", level);
170 return Ok(());
171 } else {
172 self.write_line(&line, level);
173 }
174 }
175 bail!("listing block is not terminated")
176 }
177 }
178 bail!("not a listing block")
179 }
180
process_block_with_title(&mut self, level: usize) -> anyhow::Result<()>181 fn process_block_with_title(&mut self, level: usize) -> anyhow::Result<()> {
182 if let Some(Ok(line)) = self.iter.next() {
183 let title =
184 line.strip_prefix('.').ok_or_else(|| anyhow!("extraction of the title failed"))?;
185
186 let line = self
187 .iter
188 .peek()
189 .ok_or_else(|| anyhow!("target block for the title is not found"))?;
190 let line = line.as_deref().map_err(|e| anyhow!("{e}"))?;
191 if line.starts_with(IMAGE_BLOCK_PREFIX) {
192 return self.process_image_block(Some(title), level);
193 } else if line.starts_with(VIDEO_BLOCK_PREFIX) {
194 return self.process_video_block(Some(title), level);
195 } else {
196 bail!("title for that block type is not supported");
197 }
198 }
199 bail!("not a title")
200 }
201
process_image_block(&mut self, caption: Option<&str>, level: usize) -> anyhow::Result<()>202 fn process_image_block(&mut self, caption: Option<&str>, level: usize) -> anyhow::Result<()> {
203 if let Some(Ok(line)) = self.iter.next() {
204 if let Some((url, attrs)) = parse_media_block(&line, IMAGE_BLOCK_PREFIX) {
205 let alt = if let Some(stripped) =
206 attrs.strip_prefix('"').and_then(|s| s.strip_suffix('"'))
207 {
208 stripped
209 } else {
210 attrs
211 };
212 if let Some(caption) = caption {
213 self.write_caption_line(caption, level);
214 }
215 self.write_indent(level);
216 self.output.push_str(";
219 self.output.push_str(url);
220 self.output.push_str(")\n");
221 return Ok(());
222 }
223 }
224 bail!("not a image block")
225 }
226
process_video_block(&mut self, caption: Option<&str>, level: usize) -> anyhow::Result<()>227 fn process_video_block(&mut self, caption: Option<&str>, level: usize) -> anyhow::Result<()> {
228 if let Some(Ok(line)) = self.iter.next() {
229 if let Some((url, attrs)) = parse_media_block(&line, VIDEO_BLOCK_PREFIX) {
230 let html_attrs = match attrs {
231 "options=loop" => "controls loop",
232 r#"options="autoplay,loop""# => "autoplay controls loop",
233 _ => bail!("unsupported video syntax"),
234 };
235 if let Some(caption) = caption {
236 self.write_caption_line(caption, level);
237 }
238 self.write_indent(level);
239 self.output.push_str(r#"<video src=""#);
240 self.output.push_str(url);
241 self.output.push_str(r#"" "#);
242 self.output.push_str(html_attrs);
243 self.output.push_str(">Your browser does not support the video tag.</video>\n");
244 return Ok(());
245 }
246 }
247 bail!("not a video block")
248 }
249
process_paragraph<P>(&mut self, level: usize, predicate: P) -> anyhow::Result<()> where P: Fn(&str) -> bool,250 fn process_paragraph<P>(&mut self, level: usize, predicate: P) -> anyhow::Result<()>
251 where
252 P: Fn(&str) -> bool,
253 {
254 while let Some(line) = self.iter.peek() {
255 let line = line.as_deref().map_err(|e| anyhow!("{e}"))?;
256 if predicate(line) {
257 break;
258 }
259
260 self.write_indent(level);
261 let line = self.iter.next().unwrap()?;
262 let line = line.trim_start();
263 let line = process_inline_macros(line)?;
264 if let Some(stripped) = line.strip_suffix('+') {
265 self.output.push_str(stripped);
266 self.output.push('\\');
267 } else {
268 self.output.push_str(&line);
269 }
270 self.output.push('\n');
271 }
272
273 Ok(())
274 }
275
skip_blank_lines(&mut self) -> anyhow::Result<()>276 fn skip_blank_lines(&mut self) -> anyhow::Result<()> {
277 while let Some(line) = self.iter.peek() {
278 if !line.as_deref().unwrap().is_empty() {
279 break;
280 }
281 self.iter.next().unwrap()?;
282 }
283 Ok(())
284 }
285
write_title(&mut self, indent: usize, title: &str)286 fn write_title(&mut self, indent: usize, title: &str) {
287 for _ in 0..indent {
288 self.output.push('#');
289 }
290 self.output.push(' ');
291 self.output.push_str(title);
292 self.output.push('\n');
293 }
294
write_list_item(&mut self, item: &str, nesting: &ListNesting)295 fn write_list_item(&mut self, item: &str, nesting: &ListNesting) {
296 let (marker, indent) = nesting.marker();
297 self.write_indent(indent);
298 self.output.push_str(marker);
299 self.output.push_str(item);
300 self.output.push('\n');
301 }
302
write_caption_line(&mut self, caption: &str, indent: usize)303 fn write_caption_line(&mut self, caption: &str, indent: usize) {
304 self.write_indent(indent);
305 self.output.push('_');
306 self.output.push_str(caption);
307 self.output.push_str("_\\\n");
308 }
309
write_indent(&mut self, indent: usize)310 fn write_indent(&mut self, indent: usize) {
311 for _ in 0..indent {
312 self.output.push(' ');
313 }
314 }
315
write_line(&mut self, line: &str, indent: usize)316 fn write_line(&mut self, line: &str, indent: usize) {
317 self.write_indent(indent);
318 self.output.push_str(line);
319 self.output.push('\n');
320 }
321 }
322
convert_asciidoc_to_markdown<R>(input: R) -> anyhow::Result<String> where R: BufRead,323 pub(crate) fn convert_asciidoc_to_markdown<R>(input: R) -> anyhow::Result<String>
324 where
325 R: BufRead,
326 {
327 let mut output = String::new();
328 let mut iter = input.lines().peekable();
329
330 let mut converter = Converter::new(&mut iter, &mut output);
331 converter.process()?;
332
333 Ok(output)
334 }
335
get_title(line: &str) -> Option<(usize, &str)>336 fn get_title(line: &str) -> Option<(usize, &str)> {
337 strip_prefix_symbol(line, '=')
338 }
339
get_list_item(line: &str) -> Option<(ListMarker, &str)>340 fn get_list_item(line: &str) -> Option<(ListMarker, &str)> {
341 const HYPHEN_MARKER: &str = "- ";
342 if let Some(text) = line.strip_prefix(HYPHEN_MARKER) {
343 Some((ListMarker::Hyphen, text))
344 } else if let Some((count, text)) = strip_prefix_symbol(line, '*') {
345 Some((ListMarker::Asterisk(count), text))
346 } else if let Some((count, text)) = strip_prefix_symbol(line, '.') {
347 Some((ListMarker::Dot(count), text))
348 } else {
349 None
350 }
351 }
352
strip_prefix_symbol(line: &str, symbol: char) -> Option<(usize, &str)>353 fn strip_prefix_symbol(line: &str, symbol: char) -> Option<(usize, &str)> {
354 let mut iter = line.chars();
355 if iter.next()? != symbol {
356 return None;
357 }
358 let mut count = 1;
359 loop {
360 match iter.next() {
361 Some(ch) if ch == symbol => {
362 count += 1;
363 }
364 Some(' ') => {
365 break;
366 }
367 _ => return None,
368 }
369 }
370 Some((count, iter.as_str()))
371 }
372
parse_media_block<'a>(line: &'a str, prefix: &str) -> Option<(&'a str, &'a str)>373 fn parse_media_block<'a>(line: &'a str, prefix: &str) -> Option<(&'a str, &'a str)> {
374 if let Some(line) = line.strip_prefix(prefix) {
375 if let Some((url, rest)) = line.split_once('[') {
376 if let Some(attrs) = rest.strip_suffix(']') {
377 return Some((url, attrs));
378 }
379 }
380 }
381 None
382 }
383
384 #[derive(Debug)]
385 struct ListNesting(Vec<ListMarker>);
386
387 impl ListNesting {
new() -> Self388 fn new() -> Self {
389 Self(Vec::<ListMarker>::with_capacity(6))
390 }
391
current(&mut self) -> Option<&ListMarker>392 fn current(&mut self) -> Option<&ListMarker> {
393 self.0.last()
394 }
395
set_current(&mut self, marker: ListMarker)396 fn set_current(&mut self, marker: ListMarker) {
397 let Self(markers) = self;
398 if let Some(index) = markers.iter().position(|m| *m == marker) {
399 markers.truncate(index + 1);
400 } else {
401 markers.push(marker);
402 }
403 }
404
indent(&self) -> usize405 fn indent(&self) -> usize {
406 self.0.iter().map(|m| m.in_markdown().len()).sum()
407 }
408
marker(&self) -> (&str, usize)409 fn marker(&self) -> (&str, usize) {
410 let Self(markers) = self;
411 let indent = markers.iter().take(markers.len() - 1).map(|m| m.in_markdown().len()).sum();
412 let marker = match markers.last() {
413 None => "",
414 Some(marker) => marker.in_markdown(),
415 };
416 (marker, indent)
417 }
418 }
419
420 #[derive(Debug, PartialEq, Eq)]
421 enum ListMarker {
422 Asterisk(usize),
423 Hyphen,
424 Dot(usize),
425 }
426
427 impl ListMarker {
in_markdown(&self) -> &str428 fn in_markdown(&self) -> &str {
429 match self {
430 ListMarker::Asterisk(_) => "- ",
431 ListMarker::Hyphen => "- ",
432 ListMarker::Dot(_) => "1. ",
433 }
434 }
435 }
436
process_inline_macros(line: &str) -> anyhow::Result<Cow<'_, str>>437 fn process_inline_macros(line: &str) -> anyhow::Result<Cow<'_, str>> {
438 let mut chars = line.char_indices();
439 loop {
440 let (start, end, a_macro) = match get_next_line_component(&mut chars) {
441 Component::None => break,
442 Component::Text => continue,
443 Component::Macro(s, e, m) => (s, e, m),
444 };
445 let mut src = line.chars();
446 let mut processed = String::new();
447 for _ in 0..start {
448 processed.push(src.next().unwrap());
449 }
450 processed.push_str(a_macro.process()?.as_str());
451 for _ in start..end {
452 let _ = src.next().unwrap();
453 }
454 let mut pos = end;
455
456 loop {
457 let (start, end, a_macro) = match get_next_line_component(&mut chars) {
458 Component::None => break,
459 Component::Text => continue,
460 Component::Macro(s, e, m) => (s, e, m),
461 };
462 for _ in pos..start {
463 processed.push(src.next().unwrap());
464 }
465 processed.push_str(a_macro.process()?.as_str());
466 for _ in start..end {
467 let _ = src.next().unwrap();
468 }
469 pos = end;
470 }
471 for ch in src {
472 processed.push(ch);
473 }
474 return Ok(Cow::Owned(processed));
475 }
476 Ok(Cow::Borrowed(line))
477 }
478
get_next_line_component(chars: &mut std::str::CharIndices<'_>) -> Component479 fn get_next_line_component(chars: &mut std::str::CharIndices<'_>) -> Component {
480 let (start, mut macro_name) = match chars.next() {
481 None => return Component::None,
482 Some((_, ch)) if ch == ' ' || !ch.is_ascii() => return Component::Text,
483 Some((pos, ch)) => (pos, String::from(ch)),
484 };
485 loop {
486 match chars.next() {
487 None => return Component::None,
488 Some((_, ch)) if ch == ' ' || !ch.is_ascii() => return Component::Text,
489 Some((_, ':')) => break,
490 Some((_, ch)) => macro_name.push(ch),
491 }
492 }
493
494 let mut macro_target = String::new();
495 loop {
496 match chars.next() {
497 None => return Component::None,
498 Some((_, ' ')) => return Component::Text,
499 Some((_, '[')) => break,
500 Some((_, ch)) => macro_target.push(ch),
501 }
502 }
503
504 let mut attr_value = String::new();
505 let end = loop {
506 match chars.next() {
507 None => return Component::None,
508 Some((pos, ']')) => break pos + 1,
509 Some((_, ch)) => attr_value.push(ch),
510 }
511 };
512
513 Component::Macro(start, end, Macro::new(macro_name, macro_target, attr_value))
514 }
515
516 enum Component {
517 None,
518 Text,
519 Macro(usize, usize, Macro),
520 }
521
522 struct Macro {
523 name: String,
524 target: String,
525 attrs: String,
526 }
527
528 impl Macro {
new(name: String, target: String, attrs: String) -> Self529 fn new(name: String, target: String, attrs: String) -> Self {
530 Self { name, target, attrs }
531 }
532
process(&self) -> anyhow::Result<String>533 fn process(&self) -> anyhow::Result<String> {
534 let name = &self.name;
535 let text = match name.as_str() {
536 "https" => {
537 let url = &self.target;
538 let anchor_text = &self.attrs;
539 format!("[{anchor_text}](https:{url})")
540 }
541 "image" => {
542 let url = &self.target;
543 let alt = &self.attrs;
544 format!("")
545 }
546 "kbd" => {
547 let keys = self.attrs.split('+').map(|k| Cow::Owned(format!("<kbd>{k}</kbd>")));
548 keys.collect::<Vec<_>>().join("+")
549 }
550 "pr" => {
551 let pr = &self.target;
552 let url = format!("https://github.com/rust-analyzer/rust-analyzer/pull/{pr}");
553 format!("[`#{pr}`]({url})")
554 }
555 "commit" => {
556 let hash = &self.target;
557 let short = &hash[0..7];
558 let url = format!("https://github.com/rust-analyzer/rust-analyzer/commit/{hash}");
559 format!("[`{short}`]({url})")
560 }
561 "release" => {
562 let date = &self.target;
563 let url = format!("https://github.com/rust-analyzer/rust-analyzer/releases/{date}");
564 format!("[`{date}`]({url})")
565 }
566 _ => bail!("macro not supported: {name}"),
567 };
568 Ok(text)
569 }
570 }
571
572 #[cfg(test)]
573 mod tests {
574 use super::*;
575 use std::fs::read_to_string;
576
577 #[test]
test_asciidoc_to_markdown_conversion()578 fn test_asciidoc_to_markdown_conversion() {
579 let input = read_to_string("test_data/input.adoc").unwrap();
580 let expected = read_to_string("test_data/expected.md").unwrap();
581 let actual = convert_asciidoc_to_markdown(std::io::Cursor::new(&input)).unwrap();
582
583 assert_eq!(actual, expected);
584 }
585
586 macro_rules! test_inline_macro_processing {
587 ($((
588 $name:ident,
589 $input:expr,
590 $expected:expr
591 ),)*) => ($(
592 #[test]
593 fn $name() {
594 let input = $input;
595 let actual = process_inline_macros(&input).unwrap();
596 let expected = $expected;
597 assert_eq!(actual, expected)
598 }
599 )*);
600 }
601
602 test_inline_macro_processing! {
603 (inline_macro_processing_for_empty_line, "", ""),
604 (inline_macro_processing_for_line_with_no_macro, "foo bar", "foo bar"),
605 (
606 inline_macro_processing_for_macro_in_line_start,
607 "kbd::[Ctrl+T] foo",
608 "<kbd>Ctrl</kbd>+<kbd>T</kbd> foo"
609 ),
610 (
611 inline_macro_processing_for_macro_in_line_end,
612 "foo kbd::[Ctrl+T]",
613 "foo <kbd>Ctrl</kbd>+<kbd>T</kbd>"
614 ),
615 (
616 inline_macro_processing_for_macro_in_the_middle_of_line,
617 "foo kbd::[Ctrl+T] foo",
618 "foo <kbd>Ctrl</kbd>+<kbd>T</kbd> foo"
619 ),
620 (
621 inline_macro_processing_for_several_macros,
622 "foo kbd::[Ctrl+T] foo kbd::[Enter] foo",
623 "foo <kbd>Ctrl</kbd>+<kbd>T</kbd> foo <kbd>Enter</kbd> foo"
624 ),
625 (
626 inline_macro_processing_for_several_macros_without_text_in_between,
627 "foo kbd::[Ctrl+T]kbd::[Enter] foo",
628 "foo <kbd>Ctrl</kbd>+<kbd>T</kbd><kbd>Enter</kbd> foo"
629 ),
630 }
631 }
632