• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use crate::core::Fragment;
2 use std::cell::RefCell;
3 
4 /// Cache for line numbers. This is necessary to avoid a O(n**2)
5 /// behavior when computing line numbers in [`wrap_optimal_fit`].
6 struct LineNumbers {
7     line_numbers: RefCell<Vec<usize>>,
8 }
9 
10 impl LineNumbers {
new(size: usize) -> Self11     fn new(size: usize) -> Self {
12         let mut line_numbers = Vec::with_capacity(size);
13         line_numbers.push(0);
14         LineNumbers {
15             line_numbers: RefCell::new(line_numbers),
16         }
17     }
18 
get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize19     fn get<T>(&self, i: usize, minima: &[(usize, T)]) -> usize {
20         while self.line_numbers.borrow_mut().len() < i + 1 {
21             let pos = self.line_numbers.borrow().len();
22             let line_number = 1 + self.get(minima[pos].0, &minima);
23             self.line_numbers.borrow_mut().push(line_number);
24         }
25 
26         self.line_numbers.borrow()[i]
27     }
28 }
29 
30 /// Per-line penalty. This is added for every line, which makes it
31 /// expensive to output more lines than the minimum required.
32 const NLINE_PENALTY: i32 = 1000;
33 
34 /// Per-character cost for lines that overflow the target line width.
35 ///
36 /// With a value of 50², every single character costs as much as
37 /// leaving a gap of 50 characters behind. This is becuase we assign
38 /// as cost of `gap * gap` to a short line. This means that we can
39 /// overflow the line by 1 character in extreme cases:
40 ///
41 /// ```
42 /// use textwrap::core::{wrap_optimal_fit, Word};
43 ///
44 /// let short = "foo ";
45 /// let long = "x".repeat(50);
46 /// let fragments = vec![Word::from(short), Word::from(&long)];
47 ///
48 /// // Perfect fit, both words are on a single line with no overflow.
49 /// let wrapped = wrap_optimal_fit(&fragments, |_| short.len() + long.len());
50 /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
51 ///
52 /// // The words no longer fit, yet we get a single line back. While
53 /// // the cost of overflow (`1 * 2500`) is the same as the cost of the
54 /// // gap (`50 * 50 = 2500`), the tie is broken by `NLINE_PENALTY`
55 /// // which makes it cheaper to overflow than to use two lines.
56 /// let wrapped = wrap_optimal_fit(&fragments, |_| short.len() + long.len() - 1);
57 /// assert_eq!(wrapped, vec![&[Word::from(short), Word::from(&long)]]);
58 ///
59 /// // The cost of overflow would be 2 * 2500, whereas the cost of the
60 /// // gap is only `49 * 49 + NLINE_PENALTY = 2401 + 1000 = 3401`. We
61 /// // therefore get two lines.
62 /// let wrapped = wrap_optimal_fit(&fragments, |_| short.len() + long.len() - 2);
63 /// assert_eq!(wrapped, vec![&[Word::from(short)],
64 ///                          &[Word::from(&long)]]);
65 /// ```
66 ///
67 /// This only happens if the overflowing word is 50 characters long
68 /// _and_ if it happens to overflow the line by exactly one character.
69 /// If it overflows by more than one character, the overflow penalty
70 /// will quickly outgrow the cost of the gap, as seen above.
71 const OVERFLOW_PENALTY: i32 = 50 * 50;
72 
73 /// The last line is short if it is less than 1/4 of the target width.
74 const SHORT_LINE_FRACTION: usize = 4;
75 
76 /// Penalize a short last line.
77 const SHORT_LAST_LINE_PENALTY: i32 = 25;
78 
79 /// Penalty for lines ending with a hyphen.
80 const HYPHEN_PENALTY: i32 = 25;
81 
82 /// Wrap abstract fragments into lines with an optimal-fit algorithm.
83 ///
84 /// The `line_widths` map line numbers (starting from 0) to a target
85 /// line width. This can be used to implement hanging indentation.
86 ///
87 /// The fragments must already have been split into the desired
88 /// widths, this function will not (and cannot) attempt to split them
89 /// further when arranging them into lines.
90 ///
91 /// # Optimal-Fit Algorithm
92 ///
93 /// The algorithm considers all possible break points and picks the
94 /// breaks which minimizes the gaps at the end of each line. More
95 /// precisely, the algorithm assigns a cost or penalty to each break
96 /// point, determined by `cost = gap * gap` where `gap = target_width -
97 /// line_width`. Shorter lines are thus penalized more heavily since
98 /// they leave behind a larger gap.
99 ///
100 /// We can illustrate this with the text “To be, or not to be: that is
101 /// the question”. We will be wrapping it in a narrow column with room
102 /// for only 10 characters. The [greedy
103 /// algorithm](super::wrap_first_fit) will produce these lines, each
104 /// annotated with the corresponding penalty:
105 ///
106 /// ```text
107 /// "To be, or"   1² =  1
108 /// "not to be:"  0² =  0
109 /// "that is"     3² =  9
110 /// "the"         7² = 49
111 /// "question"    2² =  4
112 /// ```
113 ///
114 /// We see that line four with “the” leaves a gap of 7 columns, which
115 /// gives it a penalty of 49. The sum of the penalties is 63.
116 ///
117 /// There are 10 words, which means that there are `2_u32.pow(9)` or
118 /// 512 different ways to typeset it. We can compute
119 /// the sum of the penalties for each possible line break and search
120 /// for the one with the lowest sum:
121 ///
122 /// ```text
123 /// "To be,"     4² = 16
124 /// "or not to"  1² =  1
125 /// "be: that"   2² =  4
126 /// "is the"     4² = 16
127 /// "question"   2² =  4
128 /// ```
129 ///
130 /// The sum of the penalties is 41, which is better than what the
131 /// greedy algorithm produced.
132 ///
133 /// Searching through all possible combinations would normally be
134 /// prohibitively slow. However, it turns out that the problem can be
135 /// formulated as the task of finding column minima in a cost matrix.
136 /// This matrix has a special form (totally monotone) which lets us
137 /// use a [linear-time algorithm called
138 /// SMAWK](https://lib.rs/crates/smawk) to find the optimal break
139 /// points.
140 ///
141 /// This means that the time complexity remains O(_n_) where _n_ is
142 /// the number of words. Compared to
143 /// [`wrap_first_fit`](super::wrap_first_fit), this function is about
144 /// 4 times slower.
145 ///
146 /// The optimization of per-line costs over the entire paragraph is
147 /// inspired by the line breaking algorithm used in TeX, as described
148 /// in the 1981 article [_Breaking Paragraphs into
149 /// Lines_](http://www.eprg.org/G53DOC/pdfs/knuth-plass-breaking.pdf)
150 /// by Knuth and Plass. The implementation here is based on [Python
151 /// code by David
152 /// Eppstein](https://github.com/jfinkels/PADS/blob/master/pads/wrap.py).
153 ///
154 /// **Note:** Only available when the `smawk` Cargo feature is
155 /// enabled.
wrap_optimal_fit<'a, T: Fragment, F: Fn(usize) -> usize>( fragments: &'a [T], line_widths: F, ) -> Vec<&'a [T]>156 pub fn wrap_optimal_fit<'a, T: Fragment, F: Fn(usize) -> usize>(
157     fragments: &'a [T],
158     line_widths: F,
159 ) -> Vec<&'a [T]> {
160     let mut widths = Vec::with_capacity(fragments.len() + 1);
161     let mut width = 0;
162     widths.push(width);
163     for fragment in fragments {
164         width += fragment.width() + fragment.whitespace_width();
165         widths.push(width);
166     }
167 
168     let line_numbers = LineNumbers::new(fragments.len());
169 
170     let minima = smawk::online_column_minima(0, widths.len(), |minima, i, j| {
171         // Line number for fragment `i`.
172         let line_number = line_numbers.get(i, &minima);
173         let target_width = std::cmp::max(1, line_widths(line_number));
174 
175         // Compute the width of a line spanning fragments[i..j] in
176         // constant time. We need to adjust widths[j] by subtracting
177         // the whitespace of fragment[j-i] and then add the penalty.
178         let line_width = widths[j] - widths[i] - fragments[j - 1].whitespace_width()
179             + fragments[j - 1].penalty_width();
180 
181         // We compute cost of the line containing fragments[i..j]. We
182         // start with values[i].1, which is the optimal cost for
183         // breaking before fragments[i].
184         //
185         // First, every extra line cost NLINE_PENALTY.
186         let mut cost = minima[i].1 + NLINE_PENALTY;
187 
188         // Next, we add a penalty depending on the line length.
189         if line_width > target_width {
190             // Lines that overflow get a hefty penalty.
191             let overflow = (line_width - target_width) as i32;
192             cost += overflow * OVERFLOW_PENALTY;
193         } else if j < fragments.len() {
194             // Other lines (except for the last line) get a milder
195             // penalty which depend on the size of the gap.
196             let gap = (target_width - line_width) as i32;
197             cost += gap * gap;
198         } else if i + 1 == j && line_width < target_width / SHORT_LINE_FRACTION {
199             // The last line can have any size gap, but we do add a
200             // penalty if the line is very short (typically because it
201             // contains just a single word).
202             cost += SHORT_LAST_LINE_PENALTY;
203         }
204 
205         // Finally, we discourage hyphens.
206         if fragments[j - 1].penalty_width() > 0 {
207             // TODO: this should use a penalty value from the fragment
208             // instead.
209             cost += HYPHEN_PENALTY;
210         }
211 
212         cost
213     });
214 
215     let mut lines = Vec::with_capacity(line_numbers.get(fragments.len(), &minima));
216     let mut pos = fragments.len();
217     loop {
218         let prev = minima[pos].0;
219         lines.push(&fragments[prev..pos]);
220         pos = prev;
221         if pos == 0 {
222             break;
223         }
224     }
225 
226     lines.reverse();
227     lines
228 }
229