1 //! Word wrapping algorithms.
2 //!
3 //! After a text has been broken into words (or [`Fragment`]s), one
4 //! now has to decide how to break the fragments into lines. The
5 //! simplest algorithm for this is implemented by [`wrap_first_fit`]:
6 //! it uses no look-ahead and simply adds fragments to the line as
7 //! long as they fit. However, this can lead to poor line breaks if a
8 //! large fragment almost-but-not-quite fits on a line. When that
9 //! happens, the fragment is moved to the next line and it will leave
10 //! behind a large gap. A more advanced algorithm, implemented by
11 //! [`wrap_optimal_fit`], will take this into account. The optimal-fit
12 //! algorithm considers all possible line breaks and will attempt to
13 //! minimize the gaps left behind by overly short lines.
14 //!
15 //! While both algorithms run in linear time, the first-fit algorithm
16 //! is about 4 times faster than the optimal-fit algorithm.
17
18 #[cfg(feature = "smawk")]
19 mod optimal_fit;
20 #[cfg(feature = "smawk")]
21 pub use optimal_fit::{wrap_optimal_fit, OverflowError, Penalties};
22
23 use crate::core::{Fragment, Word};
24
25 /// Describes how to wrap words into lines.
26 ///
27 /// The simplest approach is to wrap words one word at a time and
28 /// accept the first way of wrapping which fit
29 /// ([`WrapAlgorithm::FirstFit`]). If the `smawk` Cargo feature is
30 /// enabled, a more complex algorithm is available which will look at
31 /// an entire paragraph at a time in order to find optimal line breaks
32 /// ([`WrapAlgorithm::OptimalFit`]).
33 #[derive(Clone, Copy)]
34 pub enum WrapAlgorithm {
35 /// Wrap words using a fast and simple algorithm.
36 ///
37 /// This algorithm uses no look-ahead when finding line breaks.
38 /// Implemented by [`wrap_first_fit`], please see that function for
39 /// details and examples.
40 FirstFit,
41
42 /// Wrap words using an advanced algorithm with look-ahead.
43 ///
44 /// This wrapping algorithm considers the entire paragraph to find
45 /// optimal line breaks. When wrapping text, "penalties" are
46 /// assigned to line breaks based on the gaps left at the end of
47 /// lines. See [`Penalties`] for details.
48 ///
49 /// The underlying wrapping algorithm is implemented by
50 /// [`wrap_optimal_fit`], please see that function for examples.
51 ///
52 /// **Note:** Only available when the `smawk` Cargo feature is
53 /// enabled.
54 #[cfg(feature = "smawk")]
55 OptimalFit(Penalties),
56
57 /// Custom wrapping function.
58 ///
59 /// Use this if you want to implement your own wrapping algorithm.
60 /// The function can freely decide how to turn a slice of
61 /// [`Word`]s into lines.
62 ///
63 /// # Example
64 ///
65 /// ```
66 /// use textwrap::core::Word;
67 /// use textwrap::{wrap, Options, WrapAlgorithm};
68 ///
69 /// fn stair<'a, 'b>(words: &'b [Word<'a>], _: &'b [usize]) -> Vec<&'b [Word<'a>]> {
70 /// let mut lines = Vec::new();
71 /// let mut step = 1;
72 /// let mut start_idx = 0;
73 /// while start_idx + step <= words.len() {
74 /// lines.push(&words[start_idx .. start_idx+step]);
75 /// start_idx += step;
76 /// step += 1;
77 /// }
78 /// lines
79 /// }
80 ///
81 /// let options = Options::new(10).wrap_algorithm(WrapAlgorithm::Custom(stair));
82 /// assert_eq!(wrap("First, second, third, fourth, fifth, sixth", options),
83 /// vec!["First,",
84 /// "second, third,",
85 /// "fourth, fifth, sixth"]);
86 /// ```
87 Custom(for<'a, 'b> fn(words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]>),
88 }
89
90 impl std::fmt::Debug for WrapAlgorithm {
fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result91 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
92 match self {
93 WrapAlgorithm::FirstFit => f.write_str("FirstFit"),
94 #[cfg(feature = "smawk")]
95 WrapAlgorithm::OptimalFit(penalties) => write!(f, "OptimalFit({:?})", penalties),
96 WrapAlgorithm::Custom(_) => f.write_str("Custom(...)"),
97 }
98 }
99 }
100
101 impl WrapAlgorithm {
102 /// Create new wrap algorithm.
103 ///
104 /// The best wrapping algorithm is used by default, i.e.,
105 /// [`WrapAlgorithm::OptimalFit`] if available, otherwise
106 /// [`WrapAlgorithm::FirstFit`].
new() -> Self107 pub const fn new() -> Self {
108 #[cfg(not(feature = "smawk"))]
109 {
110 WrapAlgorithm::FirstFit
111 }
112
113 #[cfg(feature = "smawk")]
114 {
115 WrapAlgorithm::new_optimal_fit()
116 }
117 }
118
119 /// New [`WrapAlgorithm::OptimalFit`] with default penalties. This
120 /// works well for monospace text.
121 ///
122 /// **Note:** Only available when the `smawk` Cargo feature is
123 /// enabled.
124 #[cfg(feature = "smawk")]
new_optimal_fit() -> Self125 pub const fn new_optimal_fit() -> Self {
126 WrapAlgorithm::OptimalFit(Penalties::new())
127 }
128
129 /// Wrap words according to line widths.
130 ///
131 /// The `line_widths` slice gives the target line width for each
132 /// line (the last slice element is repeated as necessary). This
133 /// can be used to implement hanging indentation.
134 #[inline]
wrap<'a, 'b>( &self, words: &'b [Word<'a>], line_widths: &'b [usize], ) -> Vec<&'b [Word<'a>]>135 pub fn wrap<'a, 'b>(
136 &self,
137 words: &'b [Word<'a>],
138 line_widths: &'b [usize],
139 ) -> Vec<&'b [Word<'a>]> {
140 // Every integer up to 2u64.pow(f64::MANTISSA_DIGITS) = 2**53
141 // = 9_007_199_254_740_992 can be represented without loss by
142 // a f64. Larger line widths will be rounded to the nearest
143 // representable number.
144 let f64_line_widths = line_widths.iter().map(|w| *w as f64).collect::<Vec<_>>();
145
146 match self {
147 WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths),
148
149 #[cfg(feature = "smawk")]
150 WrapAlgorithm::OptimalFit(penalties) => {
151 // The computation cannnot overflow when the line
152 // widths are restricted to usize.
153 wrap_optimal_fit(words, &f64_line_widths, penalties).unwrap()
154 }
155
156 WrapAlgorithm::Custom(func) => func(words, line_widths),
157 }
158 }
159 }
160
161 impl Default for WrapAlgorithm {
default() -> Self162 fn default() -> Self {
163 WrapAlgorithm::new()
164 }
165 }
166
167 /// Wrap abstract fragments into lines with a first-fit algorithm.
168 ///
169 /// The `line_widths` slice gives the target line width for each line
170 /// (the last slice element is repeated as necessary). This can be
171 /// used to implement hanging indentation.
172 ///
173 /// The fragments must already have been split into the desired
174 /// widths, this function will not (and cannot) attempt to split them
175 /// further when arranging them into lines.
176 ///
177 /// # First-Fit Algorithm
178 ///
179 /// This implements a simple “greedy” algorithm: accumulate fragments
180 /// one by one and when a fragment no longer fits, start a new line.
181 /// There is no look-ahead, we simply take first fit of the fragments
182 /// we find.
183 ///
184 /// While fast and predictable, this algorithm can produce poor line
185 /// breaks when a long fragment is moved to a new line, leaving behind
186 /// a large gap:
187 ///
188 /// ```
189 /// use textwrap::core::Word;
190 /// use textwrap::wrap_algorithms::wrap_first_fit;
191 /// use textwrap::WordSeparator;
192 ///
193 /// // Helper to convert wrapped lines to a Vec<String>.
194 /// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec<String> {
195 /// lines.iter().map(|line| {
196 /// line.iter().map(|word| &**word).collect::<Vec<_>>().join(" ")
197 /// }).collect::<Vec<_>>()
198 /// }
199 ///
200 /// let text = "These few words will unfortunately not wrap nicely.";
201 /// let words = WordSeparator::AsciiSpace.find_words(text).collect::<Vec<_>>();
202 /// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0])),
203 /// vec!["These few words",
204 /// "will", // <-- short line
205 /// "unfortunately",
206 /// "not wrap",
207 /// "nicely."]);
208 ///
209 /// // We can avoid the short line if we look ahead:
210 /// #[cfg(feature = "smawk")]
211 /// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
212 /// #[cfg(feature = "smawk")]
213 /// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], &Penalties::new()).unwrap()),
214 /// vec!["These few",
215 /// "words will",
216 /// "unfortunately",
217 /// "not wrap",
218 /// "nicely."]);
219 /// ```
220 ///
221 /// The [`wrap_optimal_fit`] function was used above to get better
222 /// line breaks. It uses an advanced algorithm which tries to avoid
223 /// short lines. This function is about 4 times faster than
224 /// [`wrap_optimal_fit`].
225 ///
226 /// # Examples
227 ///
228 /// Imagine you're building a house site and you have a number of
229 /// tasks you need to execute. Things like pour foundation, complete
230 /// framing, install plumbing, electric cabling, install insulation.
231 ///
232 /// The construction workers can only work during daytime, so they
233 /// need to pack up everything at night. Because they need to secure
234 /// their tools and move machines back to the garage, this process
235 /// takes much more time than the time it would take them to simply
236 /// switch to another task.
237 ///
238 /// You would like to make a list of tasks to execute every day based
239 /// on your estimates. You can model this with a program like this:
240 ///
241 /// ```
242 /// use textwrap::core::{Fragment, Word};
243 /// use textwrap::wrap_algorithms::wrap_first_fit;
244 ///
245 /// #[derive(Debug)]
246 /// struct Task<'a> {
247 /// name: &'a str,
248 /// hours: f64, // Time needed to complete task.
249 /// sweep: f64, // Time needed for a quick sweep after task during the day.
250 /// cleanup: f64, // Time needed for full cleanup if day ends with this task.
251 /// }
252 ///
253 /// impl Fragment for Task<'_> {
254 /// fn width(&self) -> f64 { self.hours }
255 /// fn whitespace_width(&self) -> f64 { self.sweep }
256 /// fn penalty_width(&self) -> f64 { self.cleanup }
257 /// }
258 ///
259 /// // The morning tasks
260 /// let tasks = vec![
261 /// Task { name: "Foundation", hours: 4.0, sweep: 2.0, cleanup: 3.0 },
262 /// Task { name: "Framing", hours: 3.0, sweep: 1.0, cleanup: 2.0 },
263 /// Task { name: "Plumbing", hours: 2.0, sweep: 2.0, cleanup: 2.0 },
264 /// Task { name: "Electrical", hours: 2.0, sweep: 1.0, cleanup: 2.0 },
265 /// Task { name: "Insulation", hours: 2.0, sweep: 1.0, cleanup: 2.0 },
266 /// Task { name: "Drywall", hours: 3.0, sweep: 1.0, cleanup: 2.0 },
267 /// Task { name: "Floors", hours: 3.0, sweep: 1.0, cleanup: 2.0 },
268 /// Task { name: "Countertops", hours: 1.0, sweep: 1.0, cleanup: 2.0 },
269 /// Task { name: "Bathrooms", hours: 2.0, sweep: 1.0, cleanup: 2.0 },
270 /// ];
271 ///
272 /// // Fill tasks into days, taking `day_length` into account. The
273 /// // output shows the hours worked per day along with the names of
274 /// // the tasks for that day.
275 /// fn assign_days<'a>(tasks: &[Task<'a>], day_length: f64) -> Vec<(f64, Vec<&'a str>)> {
276 /// let mut days = Vec::new();
277 /// // Assign tasks to days. The assignment is a vector of slices,
278 /// // with a slice per day.
279 /// let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]);
280 /// for day in assigned_days.iter() {
281 /// let last = day.last().unwrap();
282 /// let work_hours: f64 = day.iter().map(|t| t.hours + t.sweep).sum();
283 /// let names = day.iter().map(|t| t.name).collect::<Vec<_>>();
284 /// days.push((work_hours - last.sweep + last.cleanup, names));
285 /// }
286 /// days
287 /// }
288 ///
289 /// // With a single crew working 8 hours a day:
290 /// assert_eq!(
291 /// assign_days(&tasks, 8.0),
292 /// [
293 /// (7.0, vec!["Foundation"]),
294 /// (8.0, vec!["Framing", "Plumbing"]),
295 /// (7.0, vec!["Electrical", "Insulation"]),
296 /// (5.0, vec!["Drywall"]),
297 /// (7.0, vec!["Floors", "Countertops"]),
298 /// (4.0, vec!["Bathrooms"]),
299 /// ]
300 /// );
301 ///
302 /// // With two crews working in shifts, 16 hours a day:
303 /// assert_eq!(
304 /// assign_days(&tasks, 16.0),
305 /// [
306 /// (14.0, vec!["Foundation", "Framing", "Plumbing"]),
307 /// (15.0, vec!["Electrical", "Insulation", "Drywall", "Floors"]),
308 /// (6.0, vec!["Countertops", "Bathrooms"]),
309 /// ]
310 /// );
311 /// ```
312 ///
313 /// Apologies to anyone who actually knows how to build a house and
314 /// knows how long each step takes :-)
wrap_first_fit<'a, 'b, T: Fragment>( fragments: &'a [T], line_widths: &'b [f64], ) -> Vec<&'a [T]>315 pub fn wrap_first_fit<'a, 'b, T: Fragment>(
316 fragments: &'a [T],
317 line_widths: &'b [f64],
318 ) -> Vec<&'a [T]> {
319 // The final line width is used for all remaining lines.
320 let default_line_width = line_widths.last().copied().unwrap_or(0.0);
321 let mut lines = Vec::new();
322 let mut start = 0;
323 let mut width = 0.0;
324
325 for (idx, fragment) in fragments.iter().enumerate() {
326 let line_width = line_widths
327 .get(lines.len())
328 .copied()
329 .unwrap_or(default_line_width);
330 if width + fragment.width() + fragment.penalty_width() > line_width && idx > start {
331 lines.push(&fragments[start..idx]);
332 start = idx;
333 width = 0.0;
334 }
335 width += fragment.width() + fragment.whitespace_width();
336 }
337 lines.push(&fragments[start..]);
338 lines
339 }
340
341 #[cfg(test)]
342 mod tests {
343 use super::*;
344
345 #[derive(Debug, PartialEq)]
346 struct Word(f64);
347
348 #[rustfmt::skip]
349 impl Fragment for Word {
width(&self) -> f64350 fn width(&self) -> f64 { self.0 }
whitespace_width(&self) -> f64351 fn whitespace_width(&self) -> f64 { 1.0 }
penalty_width(&self) -> f64352 fn penalty_width(&self) -> f64 { 0.0 }
353 }
354
355 #[test]
wrap_string_longer_than_f64()356 fn wrap_string_longer_than_f64() {
357 let words = vec![
358 Word(1e307),
359 Word(2e307),
360 Word(3e307),
361 Word(4e307),
362 Word(5e307),
363 Word(6e307),
364 ];
365 // Wrap at just under f64::MAX (~19e307). The tiny
366 // whitespace_widths disappear because of loss of precision.
367 assert_eq!(
368 wrap_first_fit(&words, &[15e307]),
369 &[
370 vec![
371 Word(1e307),
372 Word(2e307),
373 Word(3e307),
374 Word(4e307),
375 Word(5e307)
376 ],
377 vec![Word(6e307)]
378 ]
379 );
380 }
381 }
382