• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use std::cell::RefCell;
2 
3 use crate::core::Fragment;
4 
5 /// Penalties for
6 /// [`WrapAlgorithm::OptimalFit`](crate::WrapAlgorithm::OptimalFit)
7 /// and [`wrap_optimal_fit`].
8 ///
9 /// This wrapping algorithm in [`wrap_optimal_fit`] considers the
10 /// entire paragraph to find optimal line breaks. When wrapping text,
11 /// "penalties" are assigned to line breaks based on the gaps left at
12 /// the end of lines. The penalties are given by this struct, with
13 /// [`Penalties::default`] assigning penalties that work well for
14 /// monospace text.
15 ///
16 /// If you are wrapping proportional text, you are advised to assign
17 /// your own penalties according to your font size. See the individual
18 /// penalties below for details.
19 ///
20 /// **Note:** Only available when the `smawk` Cargo feature is
21 /// enabled.
22 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
23 pub struct Penalties {
24     /// Per-line penalty. This is added for every line, which makes it
25     /// expensive to output more lines than the minimum required.
26     pub nline_penalty: usize,
27 
28     /// Per-character cost for lines that overflow the target line width.
29     ///
30     /// With a default value of 50², every single character costs as
31     /// much as leaving a gap of 50 characters behind. This is because
32     /// we assign as cost of `gap * gap` to a short line. When
33     /// wrapping monospace text, we can overflow the line by 1
34     /// character in extreme cases:
35     ///
36     /// ```
37     /// use textwrap::core::Word;
38     /// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
39     ///
40     /// let short = "foo ";
41     /// let long = "x".repeat(50);
42     /// let length = (short.len() + long.len()) as f64;
43     /// let fragments = vec![Word::from(short), Word::from(&long)];
44     /// let penalties = Penalties::new();
45     ///
46     /// // Perfect fit, both words are on a single line with no overflow.
47     /// let wrapped = wrap_optimal_fit(&fragments, &[length], &penalties).unwrap();
48     /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
49     ///
50     /// // The words no longer fit, yet we get a single line back. While
51     /// // the cost of overflow (`1 * 2500`) is the same as the cost of the
52     /// // gap (`50 * 50 = 2500`), the tie is broken by `nline_penalty`
53     /// // which makes it cheaper to overflow than to use two lines.
54     /// let wrapped = wrap_optimal_fit(&fragments, &[length - 1.0], &penalties).unwrap();
55     /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
56     ///
57     /// // The cost of overflow would be 2 * 2500, whereas the cost of
58     /// // the gap is only `49 * 49 + nline_penalty = 2401 + 1000 =
59     /// // 3401`. We therefore get two lines.
60     /// let wrapped = wrap_optimal_fit(&fragments, &[length - 2.0], &penalties).unwrap();
61     /// assert_eq!(wrapped, vec![&[Word::from(short)],
62     ///                          &[Word::from(&long)]]);
63     /// ```
64     ///
65     /// This only happens if the overflowing word is 50 characters
66     /// long _and_ if the word overflows the line by exactly one
67     /// character. If it overflows by more than one character, the
68     /// overflow penalty will quickly outgrow the cost of the gap, as
69     /// seen above.
70     pub overflow_penalty: usize,
71 
72     /// When should the a single word on the last line be considered
73     /// "too short"?
74     ///
75     /// If the last line of the text consist of a single word and if
76     /// this word is shorter than `1 / short_last_line_fraction` of
77     /// the line width, then the final line will be considered "short"
78     /// and `short_last_line_penalty` is added as an extra penalty.
79     ///
80     /// The effect of this is to avoid a final line consisting of a
81     /// single small word. For example, with a
82     /// `short_last_line_penalty` of 25 (the default), a gap of up to
83     /// 5 columns will be seen as more desirable than having a final
84     /// short line.
85     ///
86     /// ## Examples
87     ///
88     /// ```
89     /// use textwrap::{wrap, wrap_algorithms, Options, WrapAlgorithm};
90     ///
91     /// let text = "This is a demo of the short last line penalty.";
92     ///
93     /// // The first-fit algorithm leaves a single short word on the last line:
94     /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::FirstFit)),
95     ///            vec!["This is a demo of the short last line",
96     ///                 "penalty."]);
97     ///
98     /// #[cfg(feature = "smawk")] {
99     /// let mut penalties = wrap_algorithms::Penalties::new();
100     ///
101     /// // Since "penalty." is shorter than 25% of the line width, the
102     /// // optimal-fit algorithm adds a penalty of 25. This is enough
103     /// // to move "line " down:
104     /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
105     ///            vec!["This is a demo of the short last",
106     ///                 "line penalty."]);
107     ///
108     /// // We can change the meaning of "short" lines. Here, only words
109     /// // shorter than 1/10th of the line width will be considered short:
110     /// penalties.short_last_line_fraction = 10;
111     /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
112     ///            vec!["This is a demo of the short last line",
113     ///                 "penalty."]);
114     ///
115     /// // If desired, the penalty can also be disabled:
116     /// penalties.short_last_line_fraction = 4;
117     /// penalties.short_last_line_penalty = 0;
118     /// assert_eq!(wrap(text, Options::new(37).wrap_algorithm(WrapAlgorithm::OptimalFit(penalties))),
119     ///            vec!["This is a demo of the short last line",
120     ///                 "penalty."]);
121     /// }
122     /// ```
123     pub short_last_line_fraction: usize,
124 
125     /// Penalty for a last line with a single short word.
126     ///
127     /// Set this to zero if you do not want to penalize short last lines.
128     pub short_last_line_penalty: usize,
129 
130     /// Penalty for lines ending with a hyphen.
131     pub hyphen_penalty: usize,
132 }
133 
134 impl Penalties {
135     /// Default penalties for monospace text.
136     ///
137     /// The penalties here work well for monospace text. This is
138     /// because they expect the gaps at the end of lines to be roughly
139     /// in the range `0..100`. If the gaps are larger, the
140     /// `overflow_penalty` and `hyphen_penalty` become insignificant.
new() -> Self141     pub const fn new() -> Self {
142         Penalties {
143             nline_penalty: 1000,
144             overflow_penalty: 50 * 50,
145             short_last_line_fraction: 4,
146             short_last_line_penalty: 25,
147             hyphen_penalty: 25,
148         }
149     }
150 }
151 
152 impl Default for Penalties {
default() -> Self153     fn default() -> Self {
154         Self::new()
155     }
156 }
157 
158 /// Cache for line numbers. This is necessary to avoid a O(n**2)
159 /// behavior when computing line numbers in [`wrap_optimal_fit`].
160 struct LineNumbers {
161     line_numbers: RefCell<Vec<usize>>,
162 }
163 
164 impl LineNumbers {
new(size: usize) -> Self165     fn new(size: usize) -> Self {
166         let mut line_numbers = Vec::with_capacity(size);
167         line_numbers.push(0);
168         LineNumbers {
169             line_numbers: RefCell::new(line_numbers),
170         }
171     }
172 
get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize173     fn get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize {
174         while self.line_numbers.borrow_mut().len() < i + 1 {
175             let pos = self.line_numbers.borrow().len();
176             let line_number = 1 + self.get(minima[pos].0, minima);
177             self.line_numbers.borrow_mut().push(line_number);
178         }
179 
180         self.line_numbers.borrow()[i]
181     }
182 }
183 
184 /// Overflow error during the [`wrap_optimal_fit`] computation.
185 #[derive(Debug, PartialEq, Eq)]
186 pub struct OverflowError;
187 
188 impl std::fmt::Display for OverflowError {
fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result189     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
190         write!(f, "wrap_optimal_fit cost computation overflowed")
191     }
192 }
193 
194 impl std::error::Error for OverflowError {}
195 
196 /// Wrap abstract fragments into lines with an optimal-fit algorithm.
197 ///
198 /// The `line_widths` slice gives the target line width for each line
199 /// (the last slice element is repeated as necessary). This can be
200 /// used to implement hanging indentation.
201 ///
202 /// The fragments must already have been split into the desired
203 /// widths, this function will not (and cannot) attempt to split them
204 /// further when arranging them into lines.
205 ///
206 /// # Optimal-Fit Algorithm
207 ///
208 /// The algorithm considers all possible break points and picks the
209 /// breaks which minimizes the gaps at the end of each line. More
210 /// precisely, the algorithm assigns a cost or penalty to each break
211 /// point, determined by `cost = gap * gap` where `gap = target_width -
212 /// line_width`. Shorter lines are thus penalized more heavily since
213 /// they leave behind a larger gap.
214 ///
215 /// We can illustrate this with the text “To be, or not to be: that is
216 /// the question”. We will be wrapping it in a narrow column with room
217 /// for only 10 characters. The [greedy
218 /// algorithm](super::wrap_first_fit) will produce these lines, each
219 /// annotated with the corresponding penalty:
220 ///
221 /// ```text
222 /// "To be, or"   1² =  1
223 /// "not to be:"  0² =  0
224 /// "that is"     3² =  9
225 /// "the"         7² = 49
226 /// "question"    2² =  4
227 /// ```
228 ///
229 /// We see that line four with “the” leaves a gap of 7 columns, which
230 /// gives it a penalty of 49. The sum of the penalties is 63.
231 ///
232 /// There are 10 words, which means that there are `2_u32.pow(9)` or
233 /// 512 different ways to typeset it. We can compute
234 /// the sum of the penalties for each possible line break and search
235 /// for the one with the lowest sum:
236 ///
237 /// ```text
238 /// "To be,"     4² = 16
239 /// "or not to"  1² =  1
240 /// "be: that"   2² =  4
241 /// "is the"     4² = 16
242 /// "question"   2² =  4
243 /// ```
244 ///
245 /// The sum of the penalties is 41, which is better than what the
246 /// greedy algorithm produced.
247 ///
248 /// Searching through all possible combinations would normally be
249 /// prohibitively slow. However, it turns out that the problem can be
250 /// formulated as the task of finding column minima in a cost matrix.
251 /// This matrix has a special form (totally monotone) which lets us
252 /// use a [linear-time algorithm called
253 /// SMAWK](https://lib.rs/crates/smawk) to find the optimal break
254 /// points.
255 ///
256 /// This means that the time complexity remains O(_n_) where _n_ is
257 /// the number of words. Compared to
258 /// [`wrap_first_fit()`](super::wrap_first_fit), this function is
259 /// about 4 times slower.
260 ///
261 /// The optimization of per-line costs over the entire paragraph is
262 /// inspired by the line breaking algorithm used in TeX, as described
263 /// in the 1981 article [_Breaking Paragraphs into
264 /// Lines_](http://www.eprg.org/G53DOC/pdfs/knuth-plass-breaking.pdf)
265 /// by Knuth and Plass. The implementation here is based on [Python
266 /// code by David
267 /// Eppstein](https://github.com/jfinkels/PADS/blob/master/pads/wrap.py).
268 ///
269 /// # Errors
270 ///
271 /// In case of an overflow during the cost computation, an `Err` is
272 /// returned. Overflows happens when fragments or lines have infinite
273 /// widths (`f64::INFINITY`) or if the widths are so large that the
274 /// gaps at the end of lines have sizes larger than `f64::MAX.sqrt()`
275 /// (approximately 1e154):
276 ///
277 /// ```
278 /// use textwrap::core::Fragment;
279 /// use textwrap::wrap_algorithms::{wrap_optimal_fit, OverflowError, Penalties};
280 ///
281 /// #[derive(Debug, PartialEq)]
282 /// struct Word(f64);
283 ///
284 /// impl Fragment for Word {
285 ///     fn width(&self) -> f64 { self.0 }
286 ///     fn whitespace_width(&self) -> f64 { 1.0 }
287 ///     fn penalty_width(&self) -> f64 { 0.0 }
288 /// }
289 ///
290 /// // Wrapping overflows because 1e155 * 1e155 = 1e310, which is
291 /// // larger than f64::MAX:
292 /// assert_eq!(wrap_optimal_fit(&[Word(0.0), Word(0.0)], &[1e155], &Penalties::default()),
293 ///            Err(OverflowError));
294 /// ```
295 ///
296 /// When using fragment widths and line widths which fit inside an
297 /// `u64`, overflows cannot happen. This means that fragments derived
298 /// from a `&str` cannot cause overflows.
299 ///
300 /// **Note:** Only available when the `smawk` Cargo feature is
301 /// enabled.
wrap_optimal_fit<'a, 'b, T: Fragment>( fragments: &'a [T], line_widths: &'b [f64], penalties: &'b Penalties, ) -> Result<Vec<&'a [T]>, OverflowError>302 pub fn wrap_optimal_fit<'a, 'b, T: Fragment>(
303     fragments: &'a [T],
304     line_widths: &'b [f64],
305     penalties: &'b Penalties,
306 ) -> Result<Vec<&'a [T]>, OverflowError> {
307     // The final line width is used for all remaining lines.
308     let default_line_width = line_widths.last().copied().unwrap_or(0.0);
309     let mut widths = Vec::with_capacity(fragments.len() + 1);
310     let mut width = 0.0;
311     widths.push(width);
312     for fragment in fragments {
313         width += fragment.width() + fragment.whitespace_width();
314         widths.push(width);
315     }
316 
317     let line_numbers = LineNumbers::new(fragments.len());
318 
319     let minima = smawk::online_column_minima(0.0, widths.len(), |minima, i, j| {
320         // Line number for fragment `i`.
321         let line_number = line_numbers.get(i, minima);
322         let line_width = line_widths
323             .get(line_number)
324             .copied()
325             .unwrap_or(default_line_width);
326         let target_width = line_width.max(1.0);
327 
328         // Compute the width of a line spanning fragments[i..j] in
329         // constant time. We need to adjust widths[j] by subtracting
330         // the whitespace of fragment[j-1] and then add the penalty.
331         let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width()
332             + fragments[j - 1].penalty_width();
333 
334         // We compute cost of the line containing fragments[i..j]. We
335         // start with values[i].1, which is the optimal cost for
336         // breaking before fragments[i].
337         //
338         // First, every extra line cost NLINE_PENALTY.
339         let mut cost = minima[i].1 + penalties.nline_penalty as f64;
340 
341         // Next, we add a penalty depending on the line length.
342         if line_width > target_width {
343             // Lines that overflow get a hefty penalty.
344             let overflow = line_width - target_width;
345             cost += overflow * penalties.overflow_penalty as f64;
346         } else if j < fragments.len() {
347             // Other lines (except for the last line) get a milder
348             // penalty which depend on the size of the gap.
349             let gap = target_width - line_width;
350             cost += gap * gap;
351         } else if i + 1 == j
352             && line_width < target_width / penalties.short_last_line_fraction as f64
353         {
354             // The last line can have any size gap, but we do add a
355             // penalty if the line is very short (typically because it
356             // contains just a single word).
357             cost += penalties.short_last_line_penalty as f64;
358         }
359 
360         // Finally, we discourage hyphens.
361         if fragments[j - 1].penalty_width() > 0.0 {
362             // TODO: this should use a penalty value from the fragment
363             // instead.
364             cost += penalties.hyphen_penalty as f64;
365         }
366 
367         cost
368     });
369 
370     for (_, cost) in &minima {
371         if cost.is_infinite() {
372             return Err(OverflowError);
373         }
374     }
375 
376     let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima));
377     let mut pos = fragments.len();
378     loop {
379         let prev = minima[pos].0;
380         lines.push(&fragments[prev..pos]);
381         pos = prev;
382         if pos == 0 {
383             break;
384         }
385     }
386 
387     lines.reverse();
388     Ok(lines)
389 }
390 
391 #[cfg(test)]
392 mod tests {
393     use super::*;
394 
395     #[derive(Debug, PartialEq)]
396     struct Word(f64);
397 
398     #[rustfmt::skip]
399     impl Fragment for Word {
width(&self) -> f64400         fn width(&self) -> f64 { self.0 }
whitespace_width(&self) -> f64401         fn whitespace_width(&self) -> f64 { 1.0 }
penalty_width(&self) -> f64402         fn penalty_width(&self) -> f64 { 0.0 }
403     }
404 
405     #[test]
wrap_fragments_with_infinite_widths()406     fn wrap_fragments_with_infinite_widths() {
407         let words = vec![Word(f64::INFINITY)];
408         assert_eq!(
409             wrap_optimal_fit(&words, &[0.0], &Penalties::default()),
410             Err(OverflowError)
411         );
412     }
413 
414     #[test]
wrap_fragments_with_huge_widths()415     fn wrap_fragments_with_huge_widths() {
416         let words = vec![Word(1e200), Word(1e250), Word(1e300)];
417         assert_eq!(
418             wrap_optimal_fit(&words, &[1e300], &Penalties::default()),
419             Err(OverflowError)
420         );
421     }
422 
423     #[test]
wrap_fragments_with_large_widths()424     fn wrap_fragments_with_large_widths() {
425         // The gaps will be of the sizes between 1e25 and 1e75. This
426         // makes the `gap * gap` cost fit comfortably in a f64.
427         let words = vec![Word(1e25), Word(1e50), Word(1e75)];
428         assert_eq!(
429             wrap_optimal_fit(&words, &[1e100], &Penalties::default()),
430             Ok(vec![&vec![Word(1e25), Word(1e50), Word(1e75)][..]])
431         );
432     }
433 }
434