• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Word wrapping algorithms.
2 //!
3 //! After a text has been broken into words (or [`Fragment`]s), one
4 //! now has to decide how to break the fragments into lines. The
5 //! simplest algorithm for this is implemented by [`wrap_first_fit`]:
6 //! it uses no look-ahead and simply adds fragments to the line as
7 //! long as they fit. However, this can lead to poor line breaks if a
8 //! large fragment almost-but-not-quite fits on a line. When that
9 //! happens, the fragment is moved to the next line and it will leave
10 //! behind a large gap. A more advanced algorithm, implemented by
11 //! [`wrap_optimal_fit`], will take this into account. The optimal-fit
12 //! algorithm considers all possible line breaks and will attempt to
13 //! minimize the gaps left behind by overly short lines.
14 //!
15 //! While both algorithms run in linear time, the first-fit algorithm
16 //! is about 4 times faster than the optimal-fit algorithm.
17 
18 #[cfg(feature = "smawk")]
19 mod optimal_fit;
20 #[cfg(feature = "smawk")]
21 pub use optimal_fit::{wrap_optimal_fit, OverflowError, Penalties};
22 
23 use crate::core::{Fragment, Word};
24 
25 /// Describes how to wrap words into lines.
26 ///
27 /// The simplest approach is to wrap words one word at a time and
28 /// accept the first way of wrapping which fit
29 /// ([`WrapAlgorithm::FirstFit`]). If the `smawk` Cargo feature is
30 /// enabled, a more complex algorithm is available which will look at
31 /// an entire paragraph at a time in order to find optimal line breaks
32 /// ([`WrapAlgorithm::OptimalFit`]).
33 #[derive(Clone, Copy)]
34 pub enum WrapAlgorithm {
35     /// Wrap words using a fast and simple algorithm.
36     ///
37     /// This algorithm uses no look-ahead when finding line breaks.
38     /// Implemented by [`wrap_first_fit`], please see that function for
39     /// details and examples.
40     FirstFit,
41 
42     /// Wrap words using an advanced algorithm with look-ahead.
43     ///
44     /// This wrapping algorithm considers the entire paragraph to find
45     /// optimal line breaks. When wrapping text, "penalties" are
46     /// assigned to line breaks based on the gaps left at the end of
47     /// lines. See [`Penalties`] for details.
48     ///
49     /// The underlying wrapping algorithm is implemented by
50     /// [`wrap_optimal_fit`], please see that function for examples.
51     ///
52     /// **Note:** Only available when the `smawk` Cargo feature is
53     /// enabled.
54     #[cfg(feature = "smawk")]
55     OptimalFit(Penalties),
56 
57     /// Custom wrapping function.
58     ///
59     /// Use this if you want to implement your own wrapping algorithm.
60     /// The function can freely decide how to turn a slice of
61     /// [`Word`]s into lines.
62     ///
63     /// # Example
64     ///
65     /// ```
66     /// use textwrap::core::Word;
67     /// use textwrap::{wrap, Options, WrapAlgorithm};
68     ///
69     /// fn stair<'a, 'b>(words: &'b [Word<'a>], _: &'b [usize]) -> Vec<&'b [Word<'a>]> {
70     ///     let mut lines = Vec::new();
71     ///     let mut step = 1;
72     ///     let mut start_idx = 0;
73     ///     while start_idx + step <= words.len() {
74     ///       lines.push(&words[start_idx .. start_idx+step]);
75     ///       start_idx += step;
76     ///       step += 1;
77     ///     }
78     ///     lines
79     /// }
80     ///
81     /// let options = Options::new(10).wrap_algorithm(WrapAlgorithm::Custom(stair));
82     /// assert_eq!(wrap("First, second, third, fourth, fifth, sixth", options),
83     ///            vec!["First,",
84     ///                 "second, third,",
85     ///                 "fourth, fifth, sixth"]);
86     /// ```
87     Custom(for<'a, 'b> fn(words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]>),
88 }
89 
90 impl std::fmt::Debug for WrapAlgorithm {
fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result91     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
92         match self {
93             WrapAlgorithm::FirstFit => f.write_str("FirstFit"),
94             #[cfg(feature = "smawk")]
95             WrapAlgorithm::OptimalFit(penalties) => write!(f, "OptimalFit({:?})", penalties),
96             WrapAlgorithm::Custom(_) => f.write_str("Custom(...)"),
97         }
98     }
99 }
100 
101 impl WrapAlgorithm {
102     /// Create new wrap algorithm.
103     ///
104     /// The best wrapping algorithm is used by default, i.e.,
105     /// [`WrapAlgorithm::OptimalFit`] if available, otherwise
106     /// [`WrapAlgorithm::FirstFit`].
new() -> Self107     pub const fn new() -> Self {
108         #[cfg(not(feature = "smawk"))]
109         {
110             WrapAlgorithm::FirstFit
111         }
112 
113         #[cfg(feature = "smawk")]
114         {
115             WrapAlgorithm::new_optimal_fit()
116         }
117     }
118 
119     /// New [`WrapAlgorithm::OptimalFit`] with default penalties. This
120     /// works well for monospace text.
121     ///
122     /// **Note:** Only available when the `smawk` Cargo feature is
123     /// enabled.
124     #[cfg(feature = "smawk")]
new_optimal_fit() -> Self125     pub const fn new_optimal_fit() -> Self {
126         WrapAlgorithm::OptimalFit(Penalties::new())
127     }
128 
129     /// Wrap words according to line widths.
130     ///
131     /// The `line_widths` slice gives the target line width for each
132     /// line (the last slice element is repeated as necessary). This
133     /// can be used to implement hanging indentation.
134     #[inline]
wrap<'a, 'b>( &self, words: &'b [Word<'a>], line_widths: &'b [usize], ) -> Vec<&'b [Word<'a>]>135     pub fn wrap<'a, 'b>(
136         &self,
137         words: &'b [Word<'a>],
138         line_widths: &'b [usize],
139     ) -> Vec<&'b [Word<'a>]> {
140         // Every integer up to 2u64.pow(f64::MANTISSA_DIGITS) = 2**53
141         // = 9_007_199_254_740_992 can be represented without loss by
142         // a f64. Larger line widths will be rounded to the nearest
143         // representable number.
144         let f64_line_widths = line_widths.iter().map(|w| *w as f64).collect::<Vec<_>>();
145 
146         match self {
147             WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths),
148 
149             #[cfg(feature = "smawk")]
150             WrapAlgorithm::OptimalFit(penalties) => {
151                 // The computation cannnot overflow when the line
152                 // widths are restricted to usize.
153                 wrap_optimal_fit(words, &f64_line_widths, penalties).unwrap()
154             }
155 
156             WrapAlgorithm::Custom(func) => func(words, line_widths),
157         }
158     }
159 }
160 
161 impl Default for WrapAlgorithm {
default() -> Self162     fn default() -> Self {
163         WrapAlgorithm::new()
164     }
165 }
166 
167 /// Wrap abstract fragments into lines with a first-fit algorithm.
168 ///
169 /// The `line_widths` slice gives the target line width for each line
170 /// (the last slice element is repeated as necessary). This can be
171 /// used to implement hanging indentation.
172 ///
173 /// The fragments must already have been split into the desired
174 /// widths, this function will not (and cannot) attempt to split them
175 /// further when arranging them into lines.
176 ///
177 /// # First-Fit Algorithm
178 ///
179 /// This implements a simple “greedy” algorithm: accumulate fragments
180 /// one by one and when a fragment no longer fits, start a new line.
181 /// There is no look-ahead, we simply take first fit of the fragments
182 /// we find.
183 ///
184 /// While fast and predictable, this algorithm can produce poor line
185 /// breaks when a long fragment is moved to a new line, leaving behind
186 /// a large gap:
187 ///
188 /// ```
189 /// use textwrap::core::Word;
190 /// use textwrap::wrap_algorithms::wrap_first_fit;
191 /// use textwrap::WordSeparator;
192 ///
193 /// // Helper to convert wrapped lines to a Vec<String>.
194 /// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec<String> {
195 ///     lines.iter().map(|line| {
196 ///         line.iter().map(|word| &**word).collect::<Vec<_>>().join(" ")
197 ///     }).collect::<Vec<_>>()
198 /// }
199 ///
200 /// let text = "These few words will unfortunately not wrap nicely.";
201 /// let words = WordSeparator::AsciiSpace.find_words(text).collect::<Vec<_>>();
202 /// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0])),
203 ///            vec!["These few words",
204 ///                 "will",  // <-- short line
205 ///                 "unfortunately",
206 ///                 "not wrap",
207 ///                 "nicely."]);
208 ///
209 /// // We can avoid the short line if we look ahead:
210 /// #[cfg(feature = "smawk")]
211 /// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
212 /// #[cfg(feature = "smawk")]
213 /// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], &Penalties::new()).unwrap()),
214 ///            vec!["These few",
215 ///                 "words will",
216 ///                 "unfortunately",
217 ///                 "not wrap",
218 ///                 "nicely."]);
219 /// ```
220 ///
221 /// The [`wrap_optimal_fit`] function was used above to get better
222 /// line breaks. It uses an advanced algorithm which tries to avoid
223 /// short lines. This function is about 4 times faster than
224 /// [`wrap_optimal_fit`].
225 ///
226 /// # Examples
227 ///
228 /// Imagine you're building a house site and you have a number of
229 /// tasks you need to execute. Things like pour foundation, complete
230 /// framing, install plumbing, electric cabling, install insulation.
231 ///
232 /// The construction workers can only work during daytime, so they
233 /// need to pack up everything at night. Because they need to secure
234 /// their tools and move machines back to the garage, this process
235 /// takes much more time than the time it would take them to simply
236 /// switch to another task.
237 ///
238 /// You would like to make a list of tasks to execute every day based
239 /// on your estimates. You can model this with a program like this:
240 ///
241 /// ```
242 /// use textwrap::core::{Fragment, Word};
243 /// use textwrap::wrap_algorithms::wrap_first_fit;
244 ///
245 /// #[derive(Debug)]
246 /// struct Task<'a> {
247 ///     name: &'a str,
248 ///     hours: f64,   // Time needed to complete task.
249 ///     sweep: f64,   // Time needed for a quick sweep after task during the day.
250 ///     cleanup: f64, // Time needed for full cleanup if day ends with this task.
251 /// }
252 ///
253 /// impl Fragment for Task<'_> {
254 ///     fn width(&self) -> f64 { self.hours }
255 ///     fn whitespace_width(&self) -> f64 { self.sweep }
256 ///     fn penalty_width(&self) -> f64 { self.cleanup }
257 /// }
258 ///
259 /// // The morning tasks
260 /// let tasks = vec![
261 ///     Task { name: "Foundation",  hours: 4.0, sweep: 2.0, cleanup: 3.0 },
262 ///     Task { name: "Framing",     hours: 3.0, sweep: 1.0, cleanup: 2.0 },
263 ///     Task { name: "Plumbing",    hours: 2.0, sweep: 2.0, cleanup: 2.0 },
264 ///     Task { name: "Electrical",  hours: 2.0, sweep: 1.0, cleanup: 2.0 },
265 ///     Task { name: "Insulation",  hours: 2.0, sweep: 1.0, cleanup: 2.0 },
266 ///     Task { name: "Drywall",     hours: 3.0, sweep: 1.0, cleanup: 2.0 },
267 ///     Task { name: "Floors",      hours: 3.0, sweep: 1.0, cleanup: 2.0 },
268 ///     Task { name: "Countertops", hours: 1.0, sweep: 1.0, cleanup: 2.0 },
269 ///     Task { name: "Bathrooms",   hours: 2.0, sweep: 1.0, cleanup: 2.0 },
270 /// ];
271 ///
272 /// // Fill tasks into days, taking `day_length` into account. The
273 /// // output shows the hours worked per day along with the names of
274 /// // the tasks for that day.
275 /// fn assign_days<'a>(tasks: &[Task<'a>], day_length: f64) -> Vec<(f64, Vec<&'a str>)> {
276 ///     let mut days = Vec::new();
277 ///     // Assign tasks to days. The assignment is a vector of slices,
278 ///     // with a slice per day.
279 ///     let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]);
280 ///     for day in assigned_days.iter() {
281 ///         let last = day.last().unwrap();
282 ///         let work_hours: f64 = day.iter().map(|t| t.hours + t.sweep).sum();
283 ///         let names = day.iter().map(|t| t.name).collect::<Vec<_>>();
284 ///         days.push((work_hours - last.sweep + last.cleanup, names));
285 ///     }
286 ///     days
287 /// }
288 ///
289 /// // With a single crew working 8 hours a day:
290 /// assert_eq!(
291 ///     assign_days(&tasks, 8.0),
292 ///     [
293 ///         (7.0, vec!["Foundation"]),
294 ///         (8.0, vec!["Framing", "Plumbing"]),
295 ///         (7.0, vec!["Electrical", "Insulation"]),
296 ///         (5.0, vec!["Drywall"]),
297 ///         (7.0, vec!["Floors", "Countertops"]),
298 ///         (4.0, vec!["Bathrooms"]),
299 ///     ]
300 /// );
301 ///
302 /// // With two crews working in shifts, 16 hours a day:
303 /// assert_eq!(
304 ///     assign_days(&tasks, 16.0),
305 ///     [
306 ///         (14.0, vec!["Foundation", "Framing", "Plumbing"]),
307 ///         (15.0, vec!["Electrical", "Insulation", "Drywall", "Floors"]),
308 ///         (6.0, vec!["Countertops", "Bathrooms"]),
309 ///     ]
310 /// );
311 /// ```
312 ///
313 /// Apologies to anyone who actually knows how to build a house and
314 /// knows how long each step takes :-)
wrap_first_fit<'a, 'b, T: Fragment>( fragments: &'a [T], line_widths: &'b [f64], ) -> Vec<&'a [T]>315 pub fn wrap_first_fit<'a, 'b, T: Fragment>(
316     fragments: &'a [T],
317     line_widths: &'b [f64],
318 ) -> Vec<&'a [T]> {
319     // The final line width is used for all remaining lines.
320     let default_line_width = line_widths.last().copied().unwrap_or(0.0);
321     let mut lines = Vec::new();
322     let mut start = 0;
323     let mut width = 0.0;
324 
325     for (idx, fragment) in fragments.iter().enumerate() {
326         let line_width = line_widths
327             .get(lines.len())
328             .copied()
329             .unwrap_or(default_line_width);
330         if width + fragment.width() + fragment.penalty_width() > line_width && idx > start {
331             lines.push(&fragments[start..idx]);
332             start = idx;
333             width = 0.0;
334         }
335         width += fragment.width() + fragment.whitespace_width();
336     }
337     lines.push(&fragments[start..]);
338     lines
339 }
340 
341 #[cfg(test)]
342 mod tests {
343     use super::*;
344 
345     #[derive(Debug, PartialEq)]
346     struct Word(f64);
347 
348     #[rustfmt::skip]
349     impl Fragment for Word {
width(&self) -> f64350         fn width(&self) -> f64 { self.0 }
whitespace_width(&self) -> f64351         fn whitespace_width(&self) -> f64 { 1.0 }
penalty_width(&self) -> f64352         fn penalty_width(&self) -> f64 { 0.0 }
353     }
354 
355     #[test]
wrap_string_longer_than_f64()356     fn wrap_string_longer_than_f64() {
357         let words = vec![
358             Word(1e307),
359             Word(2e307),
360             Word(3e307),
361             Word(4e307),
362             Word(5e307),
363             Word(6e307),
364         ];
365         // Wrap at just under f64::MAX (~19e307). The tiny
366         // whitespace_widths disappear because of loss of precision.
367         assert_eq!(
368             wrap_first_fit(&words, &[15e307]),
369             &[
370                 vec![
371                     Word(1e307),
372                     Word(2e307),
373                     Word(3e307),
374                     Word(4e307),
375                     Word(5e307)
376                 ],
377                 vec![Word(6e307)]
378             ]
379         );
380     }
381 }
382