• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Word wrapping algorithms.
2 //!
3 //! After a text has been broken into words (or [`Fragment`]s), one
4 //! now has to decide how to break the fragments into lines. The
5 //! simplest algorithm for this is implemented by
6 //! [`wrap_first_fit()`]: it uses no look-ahead and simply adds
7 //! fragments to the line as long as they fit. However, this can lead
8 //! to poor line breaks if a large fragment almost-but-not-quite fits
9 //! on a line. When that happens, the fragment is moved to the next
10 //! line and it will leave behind a large gap.
11 //!
12 //! A more advanced algorithm, implemented by [`wrap_optimal_fit()`],
13 //! will take this into account. The optimal-fit algorithm considers
14 //! all possible line breaks and will attempt to minimize the gaps
15 //! left behind by overly short lines.
16 //!
17 //! While both algorithms run in linear time, the first-fit algorithm
18 //! is about 4 times faster than the optimal-fit algorithm.
19 
20 #[cfg(feature = "smawk")]
21 mod optimal_fit;
22 #[cfg(feature = "smawk")]
23 pub use optimal_fit::{wrap_optimal_fit, OverflowError, Penalties};
24 
25 use crate::core::{Fragment, Word};
26 
27 /// Describes how to wrap words into lines.
28 ///
29 /// The simplest approach is to wrap words one word at a time and
30 /// accept the first way of wrapping which fit
31 /// ([`WrapAlgorithm::FirstFit`]). If the `smawk` Cargo feature is
32 /// enabled, a more complex algorithm is available which will look at
33 /// an entire paragraph at a time in order to find optimal line breaks
34 /// ([`WrapAlgorithm::OptimalFit`]).
35 #[derive(Clone, Copy, Debug)]
36 pub enum WrapAlgorithm {
37     /// Wrap words using a fast and simple algorithm.
38     ///
39     /// This algorithm uses no look-ahead when finding line breaks.
40     /// Implemented by [`wrap_first_fit()`], please see that function
41     /// for details and examples.
42     FirstFit,
43 
44     /// Wrap words using an advanced algorithm with look-ahead.
45     ///
46     /// This wrapping algorithm considers the entire paragraph to find
47     /// optimal line breaks. When wrapping text, "penalties" are
48     /// assigned to line breaks based on the gaps left at the end of
49     /// lines. See [`Penalties`] for details.
50     ///
51     /// The underlying wrapping algorithm is implemented by
52     /// [`wrap_optimal_fit()`], please see that function for examples.
53     ///
54     /// **Note:** Only available when the `smawk` Cargo feature is
55     /// enabled.
56     #[cfg(feature = "smawk")]
57     OptimalFit(Penalties),
58 
59     /// Custom wrapping function.
60     ///
61     /// Use this if you want to implement your own wrapping algorithm.
62     /// The function can freely decide how to turn a slice of
63     /// [`Word`]s into lines.
64     ///
65     /// # Example
66     ///
67     /// ```
68     /// use textwrap::core::Word;
69     /// use textwrap::{wrap, Options, WrapAlgorithm};
70     ///
71     /// fn stair<'a, 'b>(words: &'b [Word<'a>], _: &'b [usize]) -> Vec<&'b [Word<'a>]> {
72     ///     let mut lines = Vec::new();
73     ///     let mut step = 1;
74     ///     let mut start_idx = 0;
75     ///     while start_idx + step <= words.len() {
76     ///       lines.push(&words[start_idx .. start_idx+step]);
77     ///       start_idx += step;
78     ///       step += 1;
79     ///     }
80     ///     lines
81     /// }
82     ///
83     /// let options = Options::new(10).wrap_algorithm(WrapAlgorithm::Custom(stair));
84     /// assert_eq!(wrap("First, second, third, fourth, fifth, sixth", options),
85     ///            vec!["First,",
86     ///                 "second, third,",
87     ///                 "fourth, fifth, sixth"]);
88     /// ```
89     Custom(for<'a, 'b> fn(words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]>),
90 }
91 
92 impl PartialEq for WrapAlgorithm {
93     /// Compare two wrap algorithms.
94     ///
95     /// ```
96     /// use textwrap::WrapAlgorithm;
97     ///
98     /// assert_eq!(WrapAlgorithm::FirstFit, WrapAlgorithm::FirstFit);
99     /// #[cfg(feature = "smawk")] {
100     ///     assert_eq!(WrapAlgorithm::new_optimal_fit(), WrapAlgorithm::new_optimal_fit());
101     /// }
102     /// ```
103     ///
104     /// Note that `WrapAlgorithm::Custom` values never compare equal:
105     ///
106     /// ```
107     /// use textwrap::WrapAlgorithm;
108     ///
109     /// assert_ne!(WrapAlgorithm::Custom(|words, line_widths| vec![words]),
110     ///            WrapAlgorithm::Custom(|words, line_widths| vec![words]));
111     /// ```
eq(&self, other: &Self) -> bool112     fn eq(&self, other: &Self) -> bool {
113         match (self, other) {
114             (WrapAlgorithm::FirstFit, WrapAlgorithm::FirstFit) => true,
115             #[cfg(feature = "smawk")]
116             (WrapAlgorithm::OptimalFit(a), WrapAlgorithm::OptimalFit(b)) => a == b,
117             (_, _) => false,
118         }
119     }
120 }
121 
122 impl WrapAlgorithm {
123     /// Create new wrap algorithm.
124     ///
125     /// The best wrapping algorithm is used by default, i.e.,
126     /// [`WrapAlgorithm::OptimalFit`] if available, otherwise
127     /// [`WrapAlgorithm::FirstFit`].
new() -> Self128     pub const fn new() -> Self {
129         #[cfg(not(feature = "smawk"))]
130         {
131             WrapAlgorithm::FirstFit
132         }
133 
134         #[cfg(feature = "smawk")]
135         {
136             WrapAlgorithm::new_optimal_fit()
137         }
138     }
139 
140     /// New [`WrapAlgorithm::OptimalFit`] with default penalties. This
141     /// works well for monospace text.
142     ///
143     /// **Note:** Only available when the `smawk` Cargo feature is
144     /// enabled.
145     #[cfg(feature = "smawk")]
new_optimal_fit() -> Self146     pub const fn new_optimal_fit() -> Self {
147         WrapAlgorithm::OptimalFit(Penalties::new())
148     }
149 
150     /// Wrap words according to line widths.
151     ///
152     /// The `line_widths` slice gives the target line width for each
153     /// line (the last slice element is repeated as necessary). This
154     /// can be used to implement hanging indentation.
155     #[inline]
wrap<'a, 'b>( &self, words: &'b [Word<'a>], line_widths: &'b [usize], ) -> Vec<&'b [Word<'a>]>156     pub fn wrap<'a, 'b>(
157         &self,
158         words: &'b [Word<'a>],
159         line_widths: &'b [usize],
160     ) -> Vec<&'b [Word<'a>]> {
161         // Every integer up to 2u64.pow(f64::MANTISSA_DIGITS) = 2**53
162         // = 9_007_199_254_740_992 can be represented without loss by
163         // a f64. Larger line widths will be rounded to the nearest
164         // representable number.
165         let f64_line_widths = line_widths.iter().map(|w| *w as f64).collect::<Vec<_>>();
166 
167         match self {
168             WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths),
169 
170             #[cfg(feature = "smawk")]
171             WrapAlgorithm::OptimalFit(penalties) => {
172                 // The computation cannot overflow when the line
173                 // widths are restricted to usize.
174                 wrap_optimal_fit(words, &f64_line_widths, penalties).unwrap()
175             }
176 
177             WrapAlgorithm::Custom(func) => func(words, line_widths),
178         }
179     }
180 }
181 
182 impl Default for WrapAlgorithm {
default() -> Self183     fn default() -> Self {
184         WrapAlgorithm::new()
185     }
186 }
187 
188 /// Wrap abstract fragments into lines with a first-fit algorithm.
189 ///
190 /// The `line_widths` slice gives the target line width for each line
191 /// (the last slice element is repeated as necessary). This can be
192 /// used to implement hanging indentation.
193 ///
194 /// The fragments must already have been split into the desired
195 /// widths, this function will not (and cannot) attempt to split them
196 /// further when arranging them into lines.
197 ///
198 /// # First-Fit Algorithm
199 ///
200 /// This implements a simple “greedy” algorithm: accumulate fragments
201 /// one by one and when a fragment no longer fits, start a new line.
202 /// There is no look-ahead, we simply take first fit of the fragments
203 /// we find.
204 ///
205 /// While fast and predictable, this algorithm can produce poor line
206 /// breaks when a long fragment is moved to a new line, leaving behind
207 /// a large gap:
208 ///
209 /// ```
210 /// use textwrap::core::Word;
211 /// use textwrap::wrap_algorithms::wrap_first_fit;
212 /// use textwrap::WordSeparator;
213 ///
214 /// // Helper to convert wrapped lines to a Vec<String>.
215 /// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec<String> {
216 ///     lines.iter().map(|line| {
217 ///         line.iter().map(|word| &**word).collect::<Vec<_>>().join(" ")
218 ///     }).collect::<Vec<_>>()
219 /// }
220 ///
221 /// let text = "These few words will unfortunately not wrap nicely.";
222 /// let words = WordSeparator::AsciiSpace.find_words(text).collect::<Vec<_>>();
223 /// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0])),
224 ///            vec!["These few words",
225 ///                 "will",  // <-- short line
226 ///                 "unfortunately",
227 ///                 "not wrap",
228 ///                 "nicely."]);
229 ///
230 /// // We can avoid the short line if we look ahead:
231 /// #[cfg(feature = "smawk")]
232 /// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
233 /// #[cfg(feature = "smawk")]
234 /// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], &Penalties::new()).unwrap()),
235 ///            vec!["These few",
236 ///                 "words will",
237 ///                 "unfortunately",
238 ///                 "not wrap",
239 ///                 "nicely."]);
240 /// ```
241 ///
242 /// The [`wrap_optimal_fit()`] function was used above to get better
243 /// line breaks. It uses an advanced algorithm which tries to avoid
244 /// short lines. This function is about 4 times faster than
245 /// [`wrap_optimal_fit()`].
246 ///
247 /// # Examples
248 ///
249 /// Imagine you're building a house site and you have a number of
250 /// tasks you need to execute. Things like pour foundation, complete
251 /// framing, install plumbing, electric cabling, install insulation.
252 ///
253 /// The construction workers can only work during daytime, so they
254 /// need to pack up everything at night. Because they need to secure
255 /// their tools and move machines back to the garage, this process
256 /// takes much more time than the time it would take them to simply
257 /// switch to another task.
258 ///
259 /// You would like to make a list of tasks to execute every day based
260 /// on your estimates. You can model this with a program like this:
261 ///
262 /// ```
263 /// use textwrap::core::{Fragment, Word};
264 /// use textwrap::wrap_algorithms::wrap_first_fit;
265 ///
266 /// #[derive(Debug)]
267 /// struct Task<'a> {
268 ///     name: &'a str,
269 ///     hours: f64,   // Time needed to complete task.
270 ///     sweep: f64,   // Time needed for a quick sweep after task during the day.
271 ///     cleanup: f64, // Time needed for full cleanup if day ends with this task.
272 /// }
273 ///
274 /// impl Fragment for Task<'_> {
275 ///     fn width(&self) -> f64 { self.hours }
276 ///     fn whitespace_width(&self) -> f64 { self.sweep }
277 ///     fn penalty_width(&self) -> f64 { self.cleanup }
278 /// }
279 ///
280 /// // The morning tasks
281 /// let tasks = vec![
282 ///     Task { name: "Foundation",  hours: 4.0, sweep: 2.0, cleanup: 3.0 },
283 ///     Task { name: "Framing",     hours: 3.0, sweep: 1.0, cleanup: 2.0 },
284 ///     Task { name: "Plumbing",    hours: 2.0, sweep: 2.0, cleanup: 2.0 },
285 ///     Task { name: "Electrical",  hours: 2.0, sweep: 1.0, cleanup: 2.0 },
286 ///     Task { name: "Insulation",  hours: 2.0, sweep: 1.0, cleanup: 2.0 },
287 ///     Task { name: "Drywall",     hours: 3.0, sweep: 1.0, cleanup: 2.0 },
288 ///     Task { name: "Floors",      hours: 3.0, sweep: 1.0, cleanup: 2.0 },
289 ///     Task { name: "Countertops", hours: 1.0, sweep: 1.0, cleanup: 2.0 },
290 ///     Task { name: "Bathrooms",   hours: 2.0, sweep: 1.0, cleanup: 2.0 },
291 /// ];
292 ///
293 /// // Fill tasks into days, taking `day_length` into account. The
294 /// // output shows the hours worked per day along with the names of
295 /// // the tasks for that day.
296 /// fn assign_days<'a>(tasks: &[Task<'a>], day_length: f64) -> Vec<(f64, Vec<&'a str>)> {
297 ///     let mut days = Vec::new();
298 ///     // Assign tasks to days. The assignment is a vector of slices,
299 ///     // with a slice per day.
300 ///     let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]);
301 ///     for day in assigned_days.iter() {
302 ///         let last = day.last().unwrap();
303 ///         let work_hours: f64 = day.iter().map(|t| t.hours + t.sweep).sum();
304 ///         let names = day.iter().map(|t| t.name).collect::<Vec<_>>();
305 ///         days.push((work_hours - last.sweep + last.cleanup, names));
306 ///     }
307 ///     days
308 /// }
309 ///
310 /// // With a single crew working 8 hours a day:
311 /// assert_eq!(
312 ///     assign_days(&tasks, 8.0),
313 ///     [
314 ///         (7.0, vec!["Foundation"]),
315 ///         (8.0, vec!["Framing", "Plumbing"]),
316 ///         (7.0, vec!["Electrical", "Insulation"]),
317 ///         (5.0, vec!["Drywall"]),
318 ///         (7.0, vec!["Floors", "Countertops"]),
319 ///         (4.0, vec!["Bathrooms"]),
320 ///     ]
321 /// );
322 ///
323 /// // With two crews working in shifts, 16 hours a day:
324 /// assert_eq!(
325 ///     assign_days(&tasks, 16.0),
326 ///     [
327 ///         (14.0, vec!["Foundation", "Framing", "Plumbing"]),
328 ///         (15.0, vec!["Electrical", "Insulation", "Drywall", "Floors"]),
329 ///         (6.0, vec!["Countertops", "Bathrooms"]),
330 ///     ]
331 /// );
332 /// ```
333 ///
334 /// Apologies to anyone who actually knows how to build a house and
335 /// knows how long each step takes :-)
wrap_first_fit<'a, T: Fragment>(fragments: &'a [T], line_widths: &[f64]) -> Vec<&'a [T]>336 pub fn wrap_first_fit<'a, T: Fragment>(fragments: &'a [T], line_widths: &[f64]) -> Vec<&'a [T]> {
337     // The final line width is used for all remaining lines.
338     let default_line_width = line_widths.last().copied().unwrap_or(0.0);
339     let mut lines = Vec::new();
340     let mut start = 0;
341     let mut width = 0.0;
342 
343     for (idx, fragment) in fragments.iter().enumerate() {
344         let line_width = line_widths
345             .get(lines.len())
346             .copied()
347             .unwrap_or(default_line_width);
348         if width + fragment.width() + fragment.penalty_width() > line_width && idx > start {
349             lines.push(&fragments[start..idx]);
350             start = idx;
351             width = 0.0;
352         }
353         width += fragment.width() + fragment.whitespace_width();
354     }
355     lines.push(&fragments[start..]);
356     lines
357 }
358 
359 #[cfg(test)]
360 mod tests {
361     use super::*;
362 
363     #[derive(Debug, PartialEq)]
364     struct Word(f64);
365 
366     #[rustfmt::skip]
367     impl Fragment for Word {
width(&self) -> f64368         fn width(&self) -> f64 { self.0 }
whitespace_width(&self) -> f64369         fn whitespace_width(&self) -> f64 { 1.0 }
penalty_width(&self) -> f64370         fn penalty_width(&self) -> f64 { 0.0 }
371     }
372 
373     #[test]
wrap_string_longer_than_f64()374     fn wrap_string_longer_than_f64() {
375         let words = vec![
376             Word(1e307),
377             Word(2e307),
378             Word(3e307),
379             Word(4e307),
380             Word(5e307),
381             Word(6e307),
382         ];
383         // Wrap at just under f64::MAX (~19e307). The tiny
384         // whitespace_widths disappear because of loss of precision.
385         assert_eq!(
386             wrap_first_fit(&words, &[15e307]),
387             &[
388                 vec![
389                     Word(1e307),
390                     Word(2e307),
391                     Word(3e307),
392                     Word(4e307),
393                     Word(5e307)
394                 ],
395                 vec![Word(6e307)]
396             ]
397         );
398     }
399 }
400