• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Building blocks for advanced wrapping functionality.
2 //!
3 //! The functions and structs in this module can be used to implement
4 //! advanced wrapping functionality when the [`wrap`](super::wrap) and
5 //! [`fill`](super::fill) function don't do what you want.
6 //!
7 //! In general, you want to follow these steps when wrapping
8 //! something:
9 //!
10 //! 1. Split your input into [`Fragment`]s. These are abstract blocks
11 //!    of text or content which can be wrapped into lines. You can use
12 //!    [`find_words`] to do this for text.
13 //!
14 //! 2. Potentially split your fragments into smaller pieces. This
15 //!    allows you to implement things like hyphenation. If wrapping
16 //!    text, [`split_words`] can help you do this.
17 //!
18 //! 3. Potentially break apart fragments that are still too large to
19 //!    fit on a single line. This is implemented in [`break_words`].
20 //!
21 //! 4. Finally take your fragments and put them into lines. There are
22 //!    two algorithms for this: [`wrap_optimal_fit`] and
23 //!    [`wrap_first_fit`]. The former produces better line breaks, the
24 //!    latter is faster.
25 //!
26 //! 5. Iterate through the slices returned by the wrapping functions
27 //!    and construct your lines of output.
28 //!
29 //! Please [open an issue](https://github.com/mgeisler/textwrap/) if
30 //! the functionality here is not sufficient or if you have ideas for
31 //! improving it. We would love to hear from you!
32 
33 use crate::{Options, WordSplitter};
34 
35 #[cfg(feature = "smawk")]
36 mod optimal_fit;
37 #[cfg(feature = "smawk")]
38 pub use optimal_fit::wrap_optimal_fit;
39 
40 /// The CSI or “Control Sequence Introducer” introduces an ANSI escape
41 /// sequence. This is typically used for colored text and will be
42 /// ignored when computing the text width.
43 const CSI: (char, char) = ('\x1b', '[');
44 /// The final bytes of an ANSI escape sequence must be in this range.
45 const ANSI_FINAL_BYTE: std::ops::RangeInclusive<char> = '\x40'..='\x7e';
46 
47 /// Skip ANSI escape sequences. The `ch` is the current `char`, the
48 /// `chars` provide the following characters. The `chars` will be
49 /// modified if `ch` is the start of an ANSI escape sequence.
50 #[inline]
skip_ansi_escape_sequence<I: Iterator<Item = char>>(ch: char, chars: &mut I) -> bool51 fn skip_ansi_escape_sequence<I: Iterator<Item = char>>(ch: char, chars: &mut I) -> bool {
52     if ch == CSI.0 && chars.next() == Some(CSI.1) {
53         // We have found the start of an ANSI escape code, typically
54         // used for colored terminal text. We skip until we find a
55         // "final byte" in the range 0x40–0x7E.
56         for ch in chars {
57             if ANSI_FINAL_BYTE.contains(&ch) {
58                 return true;
59             }
60         }
61     }
62     false
63 }
64 
65 #[cfg(feature = "unicode-width")]
66 #[inline]
ch_width(ch: char) -> usize67 fn ch_width(ch: char) -> usize {
68     unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0)
69 }
70 
71 /// First character which [`ch_width`] will classify as double-width.
72 /// Please see [`display_width`].
73 #[cfg(not(feature = "unicode-width"))]
74 const DOUBLE_WIDTH_CUTOFF: char = '\u{1100}';
75 
76 #[cfg(not(feature = "unicode-width"))]
77 #[inline]
ch_width(ch: char) -> usize78 fn ch_width(ch: char) -> usize {
79     if ch < DOUBLE_WIDTH_CUTOFF {
80         1
81     } else {
82         2
83     }
84 }
85 
86 /// Compute the display width of `text` while skipping over ANSI
87 /// escape sequences.
88 ///
89 /// # Examples
90 ///
91 /// ```
92 /// use textwrap::core::display_width;
93 ///
94 /// assert_eq!(display_width("Café Plain"), 10);
95 /// assert_eq!(display_width("\u{1b}[31mCafé Rouge\u{1b}[0m"), 10);
96 /// ```
97 ///
98 /// **Note:** When the `unicode-width` Cargo feature is disabled, the
99 /// width of a `char` is determined by a crude approximation which
100 /// simply counts chars below U+1100 as 1 column wide, and all other
101 /// characters as 2 columns wide. With the feature enabled, function
102 /// will correctly deal with [combining characters] in their
103 /// decomposed form (see [Unicode equivalence]).
104 ///
105 /// An example of a decomposed character is “é”, which can be
106 /// decomposed into: “e” followed by a combining acute accent: “◌́”.
107 /// Without the `unicode-width` Cargo feature, every `char` below
108 /// U+1100 has a width of 1. This includes the combining accent:
109 ///
110 /// ```
111 /// use textwrap::core::display_width;
112 ///
113 /// assert_eq!(display_width("Cafe Plain"), 10);
114 /// #[cfg(feature = "unicode-width")]
115 /// assert_eq!(display_width("Cafe\u{301} Plain"), 10);
116 /// #[cfg(not(feature = "unicode-width"))]
117 /// assert_eq!(display_width("Cafe\u{301} Plain"), 11);
118 /// ```
119 ///
120 /// ## Emojis and CJK Characters
121 ///
122 /// Characters such as emojis and [CJK characters] used in the
123 /// Chinese, Japanese, and Korean langauges are seen as double-width,
124 /// even if the `unicode-width` feature is disabled:
125 ///
126 /// ```
127 /// use textwrap::core::display_width;
128 ///
129 /// assert_eq!(display_width("��������✨����������"), 20);
130 /// assert_eq!(display_width("你好"), 4);  // “Nǐ hǎo” or “Hello” in Chinese
131 /// ```
132 ///
133 /// # Limitations
134 ///
135 /// The displayed width of a string cannot always be computed from the
136 /// string alone. This is because the width depends on the rendering
137 /// engine used. This is particularly visible with [emoji modifier
138 /// sequences] where a base emoji is modified with, e.g., skin tone or
139 /// hair color modifiers. It is up to the rendering engine to detect
140 /// this and to produce a suitable emoji.
141 ///
142 /// A simple example is “❤️”, which consists of “❤” (U+2764: Black
143 /// Heart Symbol) followed by U+FE0F (Variation Selector-16). By
144 /// itself, “❤” is a black heart, but if you follow it with the
145 /// variant selector, you may get a wider red heart.
146 ///
147 /// A more complex example would be “��‍��” which should depict a man
148 /// with red hair. Here the computed width is too large — and the
149 /// width differs depending on the use of the `unicode-width` feature:
150 ///
151 /// ```
152 /// use textwrap::core::display_width;
153 ///
154 /// assert_eq!("��‍��".chars().collect::<Vec<char>>(), ['\u{1f468}', '\u{200d}', '\u{1f9b0}']);
155 /// #[cfg(feature = "unicode-width")]
156 /// assert_eq!(display_width("��‍��"), 4);
157 /// #[cfg(not(feature = "unicode-width"))]
158 /// assert_eq!(display_width("��‍��"), 6);
159 /// ```
160 ///
161 /// This happens because the grapheme consists of three code points:
162 /// “��” (U+1F468: Man), Zero Width Joiner (U+200D), and “��”
163 /// (U+1F9B0: Red Hair). You can see them above in the test. With
164 /// `unicode-width` enabled, the ZWJ is correctly seen as having zero
165 /// width, without it is counted as a double-width character.
166 ///
167 /// ## Terminal Support
168 ///
169 /// Modern browsers typically do a great job at combining characters
170 /// as shown above, but terminals often struggle more. As an example,
171 /// Gnome Terminal version 3.38.1, shows “❤️” as a big red heart, but
172 /// shows "��‍��" as “����”.
173 ///
174 /// [combining characters]: https://en.wikipedia.org/wiki/Combining_character
175 /// [Unicode equivalence]: https://en.wikipedia.org/wiki/Unicode_equivalence
176 /// [CJK characters]: https://en.wikipedia.org/wiki/CJK_characters
177 /// [emoji modifier sequences]: https://unicode.org/emoji/charts/full-emoji-modifiers.html
178 #[inline]
display_width(text: &str) -> usize179 pub fn display_width(text: &str) -> usize {
180     let mut chars = text.chars();
181     let mut width = 0;
182     while let Some(ch) = chars.next() {
183         if skip_ansi_escape_sequence(ch, &mut chars) {
184             continue;
185         }
186         width += ch_width(ch);
187     }
188     width
189 }
190 
191 /// A (text) fragment denotes the unit which we wrap into lines.
192 ///
193 /// Fragments represent an abstract _word_ plus the _whitespace_
194 /// following the word. In case the word falls at the end of the line,
195 /// the whitespace is dropped and a so-called _penalty_ is inserted
196 /// instead (typically `"-"` if the word was hyphenated).
197 ///
198 /// For wrapping purposes, the precise content of the word, the
199 /// whitespace, and the penalty is irrelevant. All we need to know is
200 /// the displayed width of each part, which this trait provides.
201 pub trait Fragment: std::fmt::Debug {
202     /// Displayed width of word represented by this fragment.
width(&self) -> usize203     fn width(&self) -> usize;
204 
205     /// Displayed width of the whitespace that must follow the word
206     /// when the word is not at the end of a line.
whitespace_width(&self) -> usize207     fn whitespace_width(&self) -> usize;
208 
209     /// Displayed width of the penalty that must be inserted if the
210     /// word falls at the end of a line.
penalty_width(&self) -> usize211     fn penalty_width(&self) -> usize;
212 }
213 
214 /// A piece of wrappable text, including any trailing whitespace.
215 ///
216 /// A `Word` is an example of a [`Fragment`], so it has a width,
217 /// trailing whitespace, and potentially a penalty item.
218 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
219 pub struct Word<'a> {
220     word: &'a str,
221     width: usize,
222     pub(crate) whitespace: &'a str,
223     pub(crate) penalty: &'a str,
224 }
225 
226 impl std::ops::Deref for Word<'_> {
227     type Target = str;
228 
deref(&self) -> &Self::Target229     fn deref(&self) -> &Self::Target {
230         self.word
231     }
232 }
233 
234 impl<'a> Word<'a> {
235     /// Construct a new `Word`.
236     ///
237     /// A trailing stretch of `' '` is automatically taken to be the
238     /// whitespace part of the word.
from(word: &str) -> Word<'_>239     pub fn from(word: &str) -> Word<'_> {
240         let trimmed = word.trim_end_matches(' ');
241         Word {
242             word: trimmed,
243             width: display_width(&trimmed),
244             whitespace: &word[trimmed.len()..],
245             penalty: "",
246         }
247     }
248 
249     /// Break this word into smaller words with a width of at most
250     /// `line_width`. The whitespace and penalty from this `Word` is
251     /// added to the last piece.
252     ///
253     /// # Examples
254     ///
255     /// ```
256     /// use textwrap::core::Word;
257     /// assert_eq!(
258     ///     Word::from("Hello!  ").break_apart(3).collect::<Vec<_>>(),
259     ///     vec![Word::from("Hel"), Word::from("lo!  ")]
260     /// );
261     /// ```
break_apart<'b>(&'b self, line_width: usize) -> impl Iterator<Item = Word<'a>> + 'b262     pub fn break_apart<'b>(&'b self, line_width: usize) -> impl Iterator<Item = Word<'a>> + 'b {
263         let mut char_indices = self.word.char_indices();
264         let mut offset = 0;
265         let mut width = 0;
266 
267         std::iter::from_fn(move || {
268             while let Some((idx, ch)) = char_indices.next() {
269                 if skip_ansi_escape_sequence(ch, &mut char_indices.by_ref().map(|(_, ch)| ch)) {
270                     continue;
271                 }
272 
273                 if width > 0 && width + ch_width(ch) > line_width {
274                     let word = Word {
275                         word: &self.word[offset..idx],
276                         width: width,
277                         whitespace: "",
278                         penalty: "",
279                     };
280                     offset = idx;
281                     width = ch_width(ch);
282                     return Some(word);
283                 }
284 
285                 width += ch_width(ch);
286             }
287 
288             if offset < self.word.len() {
289                 let word = Word {
290                     word: &self.word[offset..],
291                     width: width,
292                     whitespace: self.whitespace,
293                     penalty: self.penalty,
294                 };
295                 offset = self.word.len();
296                 return Some(word);
297             }
298 
299             None
300         })
301     }
302 }
303 
304 impl Fragment for Word<'_> {
305     #[inline]
width(&self) -> usize306     fn width(&self) -> usize {
307         self.width
308     }
309 
310     // We assume the whitespace consist of ' ' only. This allows us to
311     // compute the display width in constant time.
312     #[inline]
whitespace_width(&self) -> usize313     fn whitespace_width(&self) -> usize {
314         self.whitespace.len()
315     }
316 
317     // We assume the penalty is `""` or `"-"`. This allows us to
318     // compute the display width in constant time.
319     #[inline]
penalty_width(&self) -> usize320     fn penalty_width(&self) -> usize {
321         self.penalty.len()
322     }
323 }
324 
325 /// Split line into words separated by regions of `' '` characters.
326 ///
327 /// # Examples
328 ///
329 /// ```
330 /// use textwrap::core::{find_words, Fragment, Word};
331 /// let words = find_words("Hello World!").collect::<Vec<_>>();
332 /// assert_eq!(words, vec![Word::from("Hello "), Word::from("World!")]);
333 /// assert_eq!(words[0].width(), 5);
334 /// assert_eq!(words[0].whitespace_width(), 1);
335 /// assert_eq!(words[0].penalty_width(), 0);
336 /// ```
find_words(line: &str) -> impl Iterator<Item = Word>337 pub fn find_words(line: &str) -> impl Iterator<Item = Word> {
338     let mut start = 0;
339     let mut in_whitespace = false;
340     let mut char_indices = line.char_indices();
341 
342     std::iter::from_fn(move || {
343         // for (idx, ch) in char_indices does not work, gives this
344         // error:
345         //
346         // > cannot move out of `char_indices`, a captured variable in
347         // > an `FnMut` closure
348         #[allow(clippy::while_let_on_iterator)]
349         while let Some((idx, ch)) = char_indices.next() {
350             if in_whitespace && ch != ' ' {
351                 let word = Word::from(&line[start..idx]);
352                 start = idx;
353                 in_whitespace = ch == ' ';
354                 return Some(word);
355             }
356 
357             in_whitespace = ch == ' ';
358         }
359 
360         if start < line.len() {
361             let word = Word::from(&line[start..]);
362             start = line.len();
363             return Some(word);
364         }
365 
366         None
367     })
368 }
369 
370 /// Split words into smaller words according to the split points given
371 /// by `options`.
372 ///
373 /// Note that we split all words, regardless of their length. This is
374 /// to more cleanly separate the business of splitting (including
375 /// automatic hyphenation) from the business of word wrapping.
376 ///
377 /// # Examples
378 ///
379 /// ```
380 /// use textwrap::core::{split_words, Word};
381 /// use textwrap::{NoHyphenation, Options};
382 ///
383 /// // The default splitter is HyphenSplitter:
384 /// let options = Options::new(80);
385 /// assert_eq!(
386 ///     split_words(vec![Word::from("foo-bar")], &options).collect::<Vec<_>>(),
387 ///     vec![Word::from("foo-"), Word::from("bar")]
388 /// );
389 ///
390 /// // The NoHyphenation splitter ignores the '-':
391 /// let options = Options::new(80).splitter(NoHyphenation);
392 /// assert_eq!(
393 ///     split_words(vec![Word::from("foo-bar")], &options).collect::<Vec<_>>(),
394 ///     vec![Word::from("foo-bar")]
395 /// );
396 /// ```
split_words<'a, I, S, Opt>(words: I, options: Opt) -> impl Iterator<Item = Word<'a>> where I: IntoIterator<Item = Word<'a>>, S: WordSplitter, Opt: Into<Options<'a, S>>,397 pub fn split_words<'a, I, S, Opt>(words: I, options: Opt) -> impl Iterator<Item = Word<'a>>
398 where
399     I: IntoIterator<Item = Word<'a>>,
400     S: WordSplitter,
401     Opt: Into<Options<'a, S>>,
402 {
403     let options = options.into();
404 
405     words.into_iter().flat_map(move |word| {
406         let mut prev = 0;
407         let mut split_points = options.splitter.split_points(&word).into_iter();
408         std::iter::from_fn(move || {
409             if let Some(idx) = split_points.next() {
410                 let need_hyphen = !word[..idx].ends_with('-');
411                 let w = Word {
412                     word: &word.word[prev..idx],
413                     width: display_width(&word[prev..idx]),
414                     whitespace: "",
415                     penalty: if need_hyphen { "-" } else { "" },
416                 };
417                 prev = idx;
418                 return Some(w);
419             }
420 
421             if prev < word.word.len() || prev == 0 {
422                 let w = Word {
423                     word: &word.word[prev..],
424                     width: display_width(&word[prev..]),
425                     whitespace: word.whitespace,
426                     penalty: word.penalty,
427                 };
428                 prev = word.word.len() + 1;
429                 return Some(w);
430             }
431 
432             None
433         })
434     })
435 }
436 
437 /// Forcibly break words wider than `line_width` into smaller words.
438 ///
439 /// This simply calls [`Word::break_apart`] on words that are too
440 /// wide. This means that no extra `'-'` is inserted, the word is
441 /// simply broken into smaller pieces.
break_words<'a, I>(words: I, line_width: usize) -> Vec<Word<'a>> where I: IntoIterator<Item = Word<'a>>,442 pub fn break_words<'a, I>(words: I, line_width: usize) -> Vec<Word<'a>>
443 where
444     I: IntoIterator<Item = Word<'a>>,
445 {
446     let mut shortened_words = Vec::new();
447     for word in words {
448         if word.width() > line_width {
449             shortened_words.extend(word.break_apart(line_width));
450         } else {
451             shortened_words.push(word);
452         }
453     }
454     shortened_words
455 }
456 
457 /// Wrapping algorithms.
458 ///
459 /// After a text has been broken into [`Fragment`]s, the one now has
460 /// to decide how to break the fragments into lines. The simplest
461 /// algorithm for this is implemented by [`wrap_first_fit`]: it uses
462 /// no look-ahead and simply adds fragments to the line as long as
463 /// they fit. However, this can lead to poor line breaks if a large
464 /// fragment almost-but-not-quite fits on a line. When that happens,
465 /// the fragment is moved to the next line and it will leave behind a
466 /// large gap. A more advanced algorithm, implemented by
467 /// [`wrap_optimal_fit`], will take this into account. The optimal-fit
468 /// algorithm considers all possible line breaks and will attempt to
469 /// minimize the gaps left behind by overly short lines.
470 ///
471 /// While both algorithms run in linear time, the first-fit algorithm
472 /// is about 4 times faster than the optimal-fit algorithm.
473 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
474 pub enum WrapAlgorithm {
475     /// Use an advanced algorithm which considers the entire paragraph
476     /// to find optimal line breaks. Implemented by
477     /// [`wrap_optimal_fit`].
478     ///
479     /// **Note:** Only available when the `smawk` Cargo feature is
480     /// enabled.
481     #[cfg(feature = "smawk")]
482     OptimalFit,
483     /// Use a fast and simple algorithm with no look-ahead to find
484     /// line breaks. Implemented by [`wrap_first_fit`].
485     FirstFit,
486 }
487 
488 /// Wrap abstract fragments into lines with a first-fit algorithm.
489 ///
490 /// The `line_widths` map line numbers (starting from 0) to a target
491 /// line width. This can be used to implement hanging indentation.
492 ///
493 /// The fragments must already have been split into the desired
494 /// widths, this function will not (and cannot) attempt to split them
495 /// further when arranging them into lines.
496 ///
497 /// # First-Fit Algorithm
498 ///
499 /// This implements a simple “greedy” algorithm: accumulate fragments
500 /// one by one and when a fragment no longer fits, start a new line.
501 /// There is no look-ahead, we simply take first fit of the fragments
502 /// we find.
503 ///
504 /// While fast and predictable, this algorithm can produce poor line
505 /// breaks when a long fragment is moved to a new line, leaving behind
506 /// a large gap:
507 ///
508 /// ```
509 /// use textwrap::core::{find_words, wrap_first_fit, Word};
510 ///
511 /// // Helper to convert wrapped lines to a Vec<String>.
512 /// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec<String> {
513 ///     lines.iter().map(|line| {
514 ///         line.iter().map(|word| &**word).collect::<Vec<_>>().join(" ")
515 ///     }).collect::<Vec<_>>()
516 /// }
517 ///
518 /// let text = "These few words will unfortunately not wrap nicely.";
519 /// let words = find_words(text).collect::<Vec<_>>();
520 /// assert_eq!(lines_to_strings(wrap_first_fit(&words, |_| 15)),
521 ///            vec!["These few words",
522 ///                 "will",  // <-- short line
523 ///                 "unfortunately",
524 ///                 "not wrap",
525 ///                 "nicely."]);
526 ///
527 /// // We can avoid the short line if we look ahead:
528 /// #[cfg(feature = "smawk")]
529 /// assert_eq!(lines_to_strings(textwrap::core::wrap_optimal_fit(&words, |_| 15)),
530 ///            vec!["These few",
531 ///                 "words will",
532 ///                 "unfortunately",
533 ///                 "not wrap",
534 ///                 "nicely."]);
535 /// ```
536 ///
537 /// The [`wrap_optimal_fit`] function was used above to get better
538 /// line breaks. It uses an advanced algorithm which tries to avoid
539 /// short lines. This function is about 4 times faster than
540 /// [`wrap_optimal_fit`].
541 ///
542 /// # Examples
543 ///
544 /// Imagine you're building a house site and you have a number of
545 /// tasks you need to execute. Things like pour foundation, complete
546 /// framing, install plumbing, electric cabling, install insulation.
547 ///
548 /// The construction workers can only work during daytime, so they
549 /// need to pack up everything at night. Because they need to secure
550 /// their tools and move machines back to the garage, this process
551 /// takes much more time than the time it would take them to simply
552 /// switch to another task.
553 ///
554 /// You would like to make a list of tasks to execute every day based
555 /// on your estimates. You can model this with a program like this:
556 ///
557 /// ```
558 /// use textwrap::core::{wrap_first_fit, Fragment};
559 ///
560 /// #[derive(Debug)]
561 /// struct Task<'a> {
562 ///     name: &'a str,
563 ///     hours: usize,   // Time needed to complete task.
564 ///     sweep: usize,   // Time needed for a quick sweep after task during the day.
565 ///     cleanup: usize, // Time needed for full cleanup if day ends with this task.
566 /// }
567 ///
568 /// impl Fragment for Task<'_> {
569 ///     fn width(&self) -> usize { self.hours }
570 ///     fn whitespace_width(&self) -> usize { self.sweep }
571 ///     fn penalty_width(&self) -> usize { self.cleanup }
572 /// }
573 ///
574 /// // The morning tasks
575 /// let tasks = vec![
576 ///     Task { name: "Foundation",  hours: 4, sweep: 2, cleanup: 3 },
577 ///     Task { name: "Framing",     hours: 3, sweep: 1, cleanup: 2 },
578 ///     Task { name: "Plumbing",    hours: 2, sweep: 2, cleanup: 2 },
579 ///     Task { name: "Electrical",  hours: 2, sweep: 1, cleanup: 2 },
580 ///     Task { name: "Insulation",  hours: 2, sweep: 1, cleanup: 2 },
581 ///     Task { name: "Drywall",     hours: 3, sweep: 1, cleanup: 2 },
582 ///     Task { name: "Floors",      hours: 3, sweep: 1, cleanup: 2 },
583 ///     Task { name: "Countertops", hours: 1, sweep: 1, cleanup: 2 },
584 ///     Task { name: "Bathrooms",   hours: 2, sweep: 1, cleanup: 2 },
585 /// ];
586 ///
587 /// // Fill tasks into days, taking `day_length` into account. The
588 /// // output shows the hours worked per day along with the names of
589 /// // the tasks for that day.
590 /// fn assign_days<'a>(tasks: &[Task<'a>], day_length: usize) -> Vec<(usize, Vec<&'a str>)> {
591 ///     let mut days = Vec::new();
592 ///     // Assign tasks to days. The assignment is a vector of slices,
593 ///     // with a slice per day.
594 ///     let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, |i| day_length);
595 ///     for day in assigned_days.iter() {
596 ///         let last = day.last().unwrap();
597 ///         let work_hours: usize = day.iter().map(|t| t.hours + t.sweep).sum();
598 ///         let names = day.iter().map(|t| t.name).collect::<Vec<_>>();
599 ///         days.push((work_hours - last.sweep + last.cleanup, names));
600 ///     }
601 ///     days
602 /// }
603 ///
604 /// // With a single crew working 8 hours a day:
605 /// assert_eq!(
606 ///     assign_days(&tasks, 8),
607 ///     [
608 ///         (7, vec!["Foundation"]),
609 ///         (8, vec!["Framing", "Plumbing"]),
610 ///         (7, vec!["Electrical", "Insulation"]),
611 ///         (5, vec!["Drywall"]),
612 ///         (7, vec!["Floors", "Countertops"]),
613 ///         (4, vec!["Bathrooms"]),
614 ///     ]
615 /// );
616 ///
617 /// // With two crews working in shifts, 16 hours a day:
618 /// assert_eq!(
619 ///     assign_days(&tasks, 16),
620 ///     [
621 ///         (14, vec!["Foundation", "Framing", "Plumbing"]),
622 ///         (15, vec!["Electrical", "Insulation", "Drywall", "Floors"]),
623 ///         (6, vec!["Countertops", "Bathrooms"]),
624 ///     ]
625 /// );
626 /// ```
627 ///
628 /// Apologies to anyone who actually knows how to build a house and
629 /// knows how long each step takes :-)
wrap_first_fit<T: Fragment, F: Fn(usize) -> usize>( fragments: &[T], line_widths: F, ) -> Vec<&[T]>630 pub fn wrap_first_fit<T: Fragment, F: Fn(usize) -> usize>(
631     fragments: &[T],
632     line_widths: F,
633 ) -> Vec<&[T]> {
634     let mut lines = Vec::new();
635     let mut start = 0;
636     let mut width = 0;
637 
638     for (idx, fragment) in fragments.iter().enumerate() {
639         let line_width = line_widths(lines.len());
640         if width + fragment.width() + fragment.penalty_width() > line_width && idx > start {
641             lines.push(&fragments[start..idx]);
642             start = idx;
643             width = 0;
644         }
645         width += fragment.width() + fragment.whitespace_width();
646     }
647     lines.push(&fragments[start..]);
648     lines
649 }
650 
651 #[cfg(test)]
652 mod tests {
653     use super::*;
654 
655     #[cfg(feature = "unicode-width")]
656     use unicode_width::UnicodeWidthChar;
657 
658     // Like assert_eq!, but the left expression is an iterator.
659     macro_rules! assert_iter_eq {
660         ($left:expr, $right:expr) => {
661             assert_eq!($left.collect::<Vec<_>>(), $right);
662         };
663     }
664 
665     #[test]
skip_ansi_escape_sequence_works()666     fn skip_ansi_escape_sequence_works() {
667         let blue_text = "\u{1b}[34mHello\u{1b}[0m";
668         let mut chars = blue_text.chars();
669         let ch = chars.next().unwrap();
670         assert!(skip_ansi_escape_sequence(ch, &mut chars));
671         assert_eq!(chars.next(), Some('H'));
672     }
673 
674     #[test]
emojis_have_correct_width()675     fn emojis_have_correct_width() {
676         use unic_emoji_char::is_emoji;
677 
678         // Emojis in the Basic Latin (ASCII) and Latin-1 Supplement
679         // blocks all have a width of 1 column. This includes
680         // characters such as '#' and '©'.
681         for ch in '\u{1}'..'\u{FF}' {
682             if is_emoji(ch) {
683                 let desc = format!("{:?} U+{:04X}", ch, ch as u32);
684 
685                 #[cfg(feature = "unicode-width")]
686                 assert_eq!(ch.width().unwrap(), 1, "char: {}", desc);
687 
688                 #[cfg(not(feature = "unicode-width"))]
689                 assert_eq!(ch_width(ch), 1, "char: {}", desc);
690             }
691         }
692 
693         // Emojis in the remaining blocks of the Basic Multilingual
694         // Plane (BMP), in the Supplementary Multilingual Plane (SMP),
695         // and in the Supplementary Ideographic Plane (SIP), are all 1
696         // or 2 columns wide when unicode-width is used, and always 2
697         // columns wide otherwise. This includes all of our favorite
698         // emojis such as ��.
699         for ch in '\u{FF}'..'\u{2FFFF}' {
700             if is_emoji(ch) {
701                 let desc = format!("{:?} U+{:04X}", ch, ch as u32);
702 
703                 #[cfg(feature = "unicode-width")]
704                 assert!(ch.width().unwrap() <= 2, "char: {}", desc);
705 
706                 #[cfg(not(feature = "unicode-width"))]
707                 assert_eq!(ch_width(ch), 2, "char: {}", desc);
708             }
709         }
710 
711         // The remaining planes contain almost no assigned code points
712         // and thus also no emojis.
713     }
714 
715     #[test]
display_width_works()716     fn display_width_works() {
717         assert_eq!("Café Plain".len(), 11); // “é” is two bytes
718         assert_eq!(display_width("Café Plain"), 10);
719         assert_eq!(display_width("\u{1b}[31mCafé Rouge\u{1b}[0m"), 10);
720     }
721 
722     #[test]
display_width_narrow_emojis()723     fn display_width_narrow_emojis() {
724         #[cfg(feature = "unicode-width")]
725         assert_eq!(display_width("⁉"), 1);
726 
727         // The ⁉ character is above DOUBLE_WIDTH_CUTOFF.
728         #[cfg(not(feature = "unicode-width"))]
729         assert_eq!(display_width("⁉"), 2);
730     }
731 
732     #[test]
display_width_narrow_emojis_variant_selector()733     fn display_width_narrow_emojis_variant_selector() {
734         #[cfg(feature = "unicode-width")]
735         assert_eq!(display_width("⁉\u{fe0f}"), 1);
736 
737         // The variant selector-16 is also counted.
738         #[cfg(not(feature = "unicode-width"))]
739         assert_eq!(display_width("⁉\u{fe0f}"), 4);
740     }
741 
742     #[test]
display_width_emojis()743     fn display_width_emojis() {
744         assert_eq!(display_width("��������✨����������"), 20);
745     }
746 
747     #[test]
find_words_empty()748     fn find_words_empty() {
749         assert_iter_eq!(find_words(""), vec![]);
750     }
751 
752     #[test]
find_words_single_word()753     fn find_words_single_word() {
754         assert_iter_eq!(find_words("foo"), vec![Word::from("foo")]);
755     }
756 
757     #[test]
find_words_two_words()758     fn find_words_two_words() {
759         assert_iter_eq!(
760             find_words("foo bar"),
761             vec![Word::from("foo "), Word::from("bar")]
762         );
763     }
764 
765     #[test]
find_words_multiple_words()766     fn find_words_multiple_words() {
767         assert_iter_eq!(
768             find_words("foo bar baz"),
769             vec![Word::from("foo "), Word::from("bar "), Word::from("baz")]
770         );
771     }
772 
773     #[test]
find_words_whitespace()774     fn find_words_whitespace() {
775         assert_iter_eq!(find_words("    "), vec![Word::from("    ")]);
776     }
777 
778     #[test]
find_words_inter_word_whitespace()779     fn find_words_inter_word_whitespace() {
780         assert_iter_eq!(
781             find_words("foo   bar"),
782             vec![Word::from("foo   "), Word::from("bar")]
783         )
784     }
785 
786     #[test]
find_words_trailing_whitespace()787     fn find_words_trailing_whitespace() {
788         assert_iter_eq!(find_words("foo   "), vec![Word::from("foo   ")]);
789     }
790 
791     #[test]
find_words_leading_whitespace()792     fn find_words_leading_whitespace() {
793         assert_iter_eq!(
794             find_words("   foo"),
795             vec![Word::from("   "), Word::from("foo")]
796         );
797     }
798 
799     #[test]
find_words_multi_column_char()800     fn find_words_multi_column_char() {
801         assert_iter_eq!(
802             find_words("\u{1f920}"), // cowboy emoji ��
803             vec![Word::from("\u{1f920}")]
804         );
805     }
806 
807     #[test]
find_words_hyphens()808     fn find_words_hyphens() {
809         assert_iter_eq!(find_words("foo-bar"), vec![Word::from("foo-bar")]);
810         assert_iter_eq!(
811             find_words("foo- bar"),
812             vec![Word::from("foo- "), Word::from("bar")]
813         );
814         assert_iter_eq!(
815             find_words("foo - bar"),
816             vec![Word::from("foo "), Word::from("- "), Word::from("bar")]
817         );
818         assert_iter_eq!(
819             find_words("foo -bar"),
820             vec![Word::from("foo "), Word::from("-bar")]
821         );
822     }
823 
824     #[test]
split_words_no_words()825     fn split_words_no_words() {
826         assert_iter_eq!(split_words(vec![], 80), vec![]);
827     }
828 
829     #[test]
split_words_empty_word()830     fn split_words_empty_word() {
831         assert_iter_eq!(
832             split_words(vec![Word::from("   ")], 80),
833             vec![Word::from("   ")]
834         );
835     }
836 
837     #[test]
split_words_hyphen_splitter()838     fn split_words_hyphen_splitter() {
839         assert_iter_eq!(
840             split_words(vec![Word::from("foo-bar")], 80),
841             vec![Word::from("foo-"), Word::from("bar")]
842         );
843     }
844 
845     #[test]
split_words_short_line()846     fn split_words_short_line() {
847         // Note that `split_words` does not take the line width into
848         // account, that is the job of `break_words`.
849         assert_iter_eq!(
850             split_words(vec![Word::from("foobar")], 3),
851             vec![Word::from("foobar")]
852         );
853     }
854 
855     #[test]
split_words_adds_penalty()856     fn split_words_adds_penalty() {
857         #[derive(Debug)]
858         struct FixedSplitPoint;
859         impl WordSplitter for FixedSplitPoint {
860             fn split_points(&self, _: &str) -> Vec<usize> {
861                 vec![3]
862             }
863         }
864 
865         let options = Options::new(80).splitter(FixedSplitPoint);
866         assert_iter_eq!(
867             split_words(vec![Word::from("foobar")].into_iter(), &options),
868             vec![
869                 Word {
870                     word: "foo",
871                     width: 3,
872                     whitespace: "",
873                     penalty: "-"
874                 },
875                 Word {
876                     word: "bar",
877                     width: 3,
878                     whitespace: "",
879                     penalty: ""
880                 }
881             ]
882         );
883 
884         assert_iter_eq!(
885             split_words(vec![Word::from("fo-bar")].into_iter(), &options),
886             vec![
887                 Word {
888                     word: "fo-",
889                     width: 3,
890                     whitespace: "",
891                     penalty: ""
892                 },
893                 Word {
894                     word: "bar",
895                     width: 3,
896                     whitespace: "",
897                     penalty: ""
898                 }
899             ]
900         );
901     }
902 }
903