textwrap/src/wrap_algorithms.rs

//! Word wrapping algorithms.
//!
//! After a text has been broken into words (or [`Fragment`]s), one
//! now has to decide how to break the fragments into lines. The
//! simplest algorithm for this is implemented by [`wrap_first_fit`]:
//! it uses no look-ahead and simply adds fragments to the line as
//! long as they fit. However, this can lead to poor line breaks if a
//! large fragment almost-but-not-quite fits on a line. When that
//! happens, the fragment is moved to the next line and it will leave
//! behind a large gap. A more advanced algorithm, implemented by
//! [`wrap_optimal_fit`], will take this into account. The optimal-fit
//! algorithm considers all possible line breaks and will attempt to
//! minimize the gaps left behind by overly short lines.
//!
//! While both algorithms run in linear time, the first-fit algorithm
//! is about 4 times faster than the optimal-fit algorithm.

#[cfg(feature = "smawk")]
mod optimal_fit;
#[cfg(feature = "smawk")]
pub use optimal_fit::{wrap_optimal_fit, OverflowError, Penalties};

use crate::core::{Fragment, Word};

/// Describes how to wrap words into lines.
///
/// The simplest approach is to wrap words one word at a time and
/// accept the first way of wrapping which fit
/// ([`WrapAlgorithm::FirstFit`]). If the `smawk` Cargo feature is
/// enabled, a more complex algorithm is available which will look at
/// an entire paragraph at a time in order to find optimal line breaks
/// ([`WrapAlgorithm::OptimalFit`]).
#[derive(Clone, Copy)]
pub enum WrapAlgorithm {
    /// Wrap words using a fast and simple algorithm.
    ///
    /// This algorithm uses no look-ahead when finding line breaks.
    /// Implemented by [`wrap_first_fit`], please see that function for
    /// details and examples.
    FirstFit,

    /// Wrap words using an advanced algorithm with look-ahead.
    ///
    /// This wrapping algorithm considers the entire paragraph to find
    /// optimal line breaks. When wrapping text, "penalties" are
    /// assigned to line breaks based on the gaps left at the end of
    /// lines. See [`Penalties`] for details.
    ///
    /// The underlying wrapping algorithm is implemented by
    /// [`wrap_optimal_fit`], please see that function for examples.
    ///
    /// **Note:** Only available when the `smawk` Cargo feature is
    /// enabled.
    #[cfg(feature = "smawk")]
    OptimalFit(Penalties),

    /// Custom wrapping function.
    ///
    /// Use this if you want to implement your own wrapping algorithm.
    /// The function can freely decide how to turn a slice of
    /// [`Word`]s into lines.
    ///
    /// # Example
    ///
    /// ```
    /// use textwrap::core::Word;
    /// use textwrap::{wrap, Options, WrapAlgorithm};
    ///
    /// fn stair<'a, 'b>(words: &'b [Word<'a>], _: &'b [usize]) -> Vec<&'b [Word<'a>]> {
    ///     let mut lines = Vec::new();
    ///     let mut step = 1;
    ///     let mut start_idx = 0;
    ///     while start_idx + step <= words.len() {
    ///       lines.push(&words[start_idx .. start_idx+step]);
    ///       start_idx += step;
    ///       step += 1;
    ///     }
    ///     lines
    /// }
    ///
    /// let options = Options::new(10).wrap_algorithm(WrapAlgorithm::Custom(stair));
    /// assert_eq!(wrap("First, second, third, fourth, fifth, sixth", options),
    ///            vec!["First,",
    ///                 "second, third,",
    ///                 "fourth, fifth, sixth"]);
    /// ```
    Custom(for<'a, 'b> fn(words: &'b [Word<'a>], line_widths: &'b [usize]) -> Vec<&'b [Word<'a>]>),
}

impl std::fmt::Debug for WrapAlgorithm {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            WrapAlgorithm::FirstFit => f.write_str("FirstFit"),
            #[cfg(feature = "smawk")]
            WrapAlgorithm::OptimalFit(penalties) => write!(f, "OptimalFit({:?})", penalties),
            WrapAlgorithm::Custom(_) => f.write_str("Custom(...)"),
        }
    }
}

impl WrapAlgorithm {
    /// Create new wrap algorithm.
    ///
    /// The best wrapping algorithm is used by default, i.e.,
    /// [`WrapAlgorithm::OptimalFit`] if available, otherwise
    /// [`WrapAlgorithm::FirstFit`].
    pub const fn new() -> Self {
        #[cfg(not(feature = "smawk"))]
        {
            WrapAlgorithm::FirstFit
        }

        #[cfg(feature = "smawk")]
        {
            WrapAlgorithm::new_optimal_fit()
        }
    }

    /// New [`WrapAlgorithm::OptimalFit`] with default penalties. This
    /// works well for monospace text.
    ///
    /// **Note:** Only available when the `smawk` Cargo feature is
    /// enabled.
    #[cfg(feature = "smawk")]
    pub const fn new_optimal_fit() -> Self {
        WrapAlgorithm::OptimalFit(Penalties::new())
    }

    /// Wrap words according to line widths.
    ///
    /// The `line_widths` slice gives the target line width for each
    /// line (the last slice element is repeated as necessary). This
    /// can be used to implement hanging indentation.
    #[inline]
    pub fn wrap<'a, 'b>(
        &self,
        words: &'b [Word<'a>],
        line_widths: &'b [usize],
    ) -> Vec<&'b [Word<'a>]> {
        // Every integer up to 2u64.pow(f64::MANTISSA_DIGITS) = 2**53
        // = 9_007_199_254_740_992 can be represented without loss by
        // a f64. Larger line widths will be rounded to the nearest
        // representable number.
        let f64_line_widths = line_widths.iter().map(|w| *w as f64).collect::<Vec<_>>();

        match self {
            WrapAlgorithm::FirstFit => wrap_first_fit(words, &f64_line_widths),

            #[cfg(feature = "smawk")]
            WrapAlgorithm::OptimalFit(penalties) => {
                // The computation cannnot overflow when the line
                // widths are restricted to usize.
                wrap_optimal_fit(words, &f64_line_widths, penalties).unwrap()
            }

            WrapAlgorithm::Custom(func) => func(words, line_widths),
        }
    }
}

impl Default for WrapAlgorithm {
    fn default() -> Self {
        WrapAlgorithm::new()
    }
}

/// Wrap abstract fragments into lines with a first-fit algorithm.
///
/// The `line_widths` slice gives the target line width for each line
/// (the last slice element is repeated as necessary). This can be
/// used to implement hanging indentation.
///
/// The fragments must already have been split into the desired
/// widths, this function will not (and cannot) attempt to split them
/// further when arranging them into lines.
///
/// # First-Fit Algorithm
///
/// This implements a simple “greedy” algorithm: accumulate fragments
/// one by one and when a fragment no longer fits, start a new line.
/// There is no look-ahead, we simply take first fit of the fragments
/// we find.
///
/// While fast and predictable, this algorithm can produce poor line
/// breaks when a long fragment is moved to a new line, leaving behind
/// a large gap:
///
/// ```
/// use textwrap::core::Word;
/// use textwrap::wrap_algorithms::wrap_first_fit;
/// use textwrap::WordSeparator;
///
/// // Helper to convert wrapped lines to a Vec<String>.
/// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec<String> {
///     lines.iter().map(|line| {
///         line.iter().map(|word| &**word).collect::<Vec<_>>().join(" ")
///     }).collect::<Vec<_>>()
/// }
///
/// let text = "These few words will unfortunately not wrap nicely.";
/// let words = WordSeparator::AsciiSpace.find_words(text).collect::<Vec<_>>();
/// assert_eq!(lines_to_strings(wrap_first_fit(&words, &[15.0])),
///            vec!["These few words",
///                 "will",  // <-- short line
///                 "unfortunately",
///                 "not wrap",
///                 "nicely."]);
///
/// // We can avoid the short line if we look ahead:
/// #[cfg(feature = "smawk")]
/// use textwrap::wrap_algorithms::{wrap_optimal_fit, Penalties};
/// #[cfg(feature = "smawk")]
/// assert_eq!(lines_to_strings(wrap_optimal_fit(&words, &[15.0], &Penalties::new()).unwrap()),
///            vec!["These few",
///                 "words will",
///                 "unfortunately",
///                 "not wrap",
///                 "nicely."]);
/// ```
///
/// The [`wrap_optimal_fit`] function was used above to get better
/// line breaks. It uses an advanced algorithm which tries to avoid
/// short lines. This function is about 4 times faster than
/// [`wrap_optimal_fit`].
///
/// # Examples
///
/// Imagine you're building a house site and you have a number of
/// tasks you need to execute. Things like pour foundation, complete
/// framing, install plumbing, electric cabling, install insulation.
///
/// The construction workers can only work during daytime, so they
/// need to pack up everything at night. Because they need to secure
/// their tools and move machines back to the garage, this process
/// takes much more time than the time it would take them to simply
/// switch to another task.
///
/// You would like to make a list of tasks to execute every day based
/// on your estimates. You can model this with a program like this:
///
/// ```
/// use textwrap::core::{Fragment, Word};
/// use textwrap::wrap_algorithms::wrap_first_fit;
///
/// #[derive(Debug)]
/// struct Task<'a> {
///     name: &'a str,
///     hours: f64,   // Time needed to complete task.
///     sweep: f64,   // Time needed for a quick sweep after task during the day.
///     cleanup: f64, // Time needed for full cleanup if day ends with this task.
/// }
///
/// impl Fragment for Task<'_> {
///     fn width(&self) -> f64 { self.hours }
///     fn whitespace_width(&self) -> f64 { self.sweep }
///     fn penalty_width(&self) -> f64 { self.cleanup }
/// }
///
/// // The morning tasks
/// let tasks = vec![
///     Task { name: "Foundation",  hours: 4.0, sweep: 2.0, cleanup: 3.0 },
///     Task { name: "Framing",     hours: 3.0, sweep: 1.0, cleanup: 2.0 },
///     Task { name: "Plumbing",    hours: 2.0, sweep: 2.0, cleanup: 2.0 },
///     Task { name: "Electrical",  hours: 2.0, sweep: 1.0, cleanup: 2.0 },
///     Task { name: "Insulation",  hours: 2.0, sweep: 1.0, cleanup: 2.0 },
///     Task { name: "Drywall",     hours: 3.0, sweep: 1.0, cleanup: 2.0 },
///     Task { name: "Floors",      hours: 3.0, sweep: 1.0, cleanup: 2.0 },
///     Task { name: "Countertops", hours: 1.0, sweep: 1.0, cleanup: 2.0 },
///     Task { name: "Bathrooms",   hours: 2.0, sweep: 1.0, cleanup: 2.0 },
/// ];
///
/// // Fill tasks into days, taking `day_length` into account. The
/// // output shows the hours worked per day along with the names of
/// // the tasks for that day.
/// fn assign_days<'a>(tasks: &[Task<'a>], day_length: f64) -> Vec<(f64, Vec<&'a str>)> {
///     let mut days = Vec::new();
///     // Assign tasks to days. The assignment is a vector of slices,
///     // with a slice per day.
///     let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, &[day_length]);
///     for day in assigned_days.iter() {
///         let last = day.last().unwrap();
///         let work_hours: f64 = day.iter().map(|t| t.hours + t.sweep).sum();
///         let names = day.iter().map(|t| t.name).collect::<Vec<_>>();
///         days.push((work_hours - last.sweep + last.cleanup, names));
///     }
///     days
/// }
///
/// // With a single crew working 8 hours a day:
/// assert_eq!(
///     assign_days(&tasks, 8.0),
///     [
///         (7.0, vec!["Foundation"]),
///         (8.0, vec!["Framing", "Plumbing"]),
///         (7.0, vec!["Electrical", "Insulation"]),
///         (5.0, vec!["Drywall"]),
///         (7.0, vec!["Floors", "Countertops"]),
///         (4.0, vec!["Bathrooms"]),
///     ]
/// );
///
/// // With two crews working in shifts, 16 hours a day:
/// assert_eq!(
///     assign_days(&tasks, 16.0),
///     [
///         (14.0, vec!["Foundation", "Framing", "Plumbing"]),
///         (15.0, vec!["Electrical", "Insulation", "Drywall", "Floors"]),
///         (6.0, vec!["Countertops", "Bathrooms"]),
///     ]
/// );
/// ```
///
/// Apologies to anyone who actually knows how to build a house and
/// knows how long each step takes :-)
pub fn wrap_first_fit<'a, 'b, T: Fragment>(
    fragments: &'a [T],
    line_widths: &'b [f64],
) -> Vec<&'a [T]> {
    // The final line width is used for all remaining lines.
    let default_line_width = line_widths.last().copied().unwrap_or(0.0);
    let mut lines = Vec::new();
    let mut start = 0;
    let mut width = 0.0;

    for (idx, fragment) in fragments.iter().enumerate() {
        let line_width = line_widths
            .get(lines.len())
            .copied()
            .unwrap_or(default_line_width);
        if width + fragment.width() + fragment.penalty_width() > line_width && idx > start {
            lines.push(&fragments[start..idx]);
            start = idx;
            width = 0.0;
        }
        width += fragment.width() + fragment.whitespace_width();
    }
    lines.push(&fragments[start..]);
    lines
}

#[cfg(test)]
mod tests {
    use super::*;

    #[derive(Debug, PartialEq)]
    struct Word(f64);

    #[rustfmt::skip]
    impl Fragment for Word {
        fn width(&self) -> f64 { self.0 }
        fn whitespace_width(&self) -> f64 { 1.0 }
        fn penalty_width(&self) -> f64 { 0.0 }
    }

    #[test]
    fn wrap_string_longer_than_f64() {
        let words = vec![
            Word(1e307),
            Word(2e307),
            Word(3e307),
            Word(4e307),
            Word(5e307),
            Word(6e307),
        ];
        // Wrap at just under f64::MAX (~19e307). The tiny
        // whitespace_widths disappear because of loss of precision.
        assert_eq!(
            wrap_first_fit(&words, &[15e307]),
            &[
                vec![
                    Word(1e307),
                    Word(2e307),
                    Word(3e307),
                    Word(4e307),
                    Word(5e307)
                ],
                vec![Word(6e307)]
            ]
        );
    }
}