1 //! Word splitting functionality. 2 //! 3 //! To wrap text into lines, long words sometimes need to be split 4 //! across lines. The [`WordSplitter`] trait defines this 5 //! functionality. [`HyphenSplitter`] is the default implementation of 6 //! this treat: it will simply split words on existing hyphens. 7 8 /// The `WordSplitter` trait describes where words can be split. 9 /// 10 /// If the textwrap crate has been compiled with the `hyphenation` 11 /// Cargo feature enabled, you will find an implementation of 12 /// `WordSplitter` by the `hyphenation::Standard` struct. Use this 13 /// struct for language-aware hyphenation: 14 /// 15 /// ``` 16 /// #[cfg(feature = "hyphenation")] 17 /// { 18 /// use hyphenation::{Language, Load, Standard}; 19 /// use textwrap::{wrap, Options}; 20 /// 21 /// let text = "Oxidation is the loss of electrons."; 22 /// let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); 23 /// let options = Options::new(8).splitter(dictionary); 24 /// assert_eq!(wrap(text, &options), vec!["Oxida-", 25 /// "tion is", 26 /// "the loss", 27 /// "of elec-", 28 /// "trons."]); 29 /// } 30 /// ``` 31 /// 32 /// Please see the documentation for the [hyphenation] crate for more 33 /// details. 34 /// 35 /// [hyphenation]: https://docs.rs/hyphenation/ 36 pub trait WordSplitter: std::fmt::Debug { 37 /// Return all possible indices where `word` can be split. 38 /// 39 /// The indices returned must be in range `0..word.len()`. They 40 /// should point to the index _after_ the split point, i.e., after 41 /// `-` if splitting on hyphens. This way, `word.split_at(idx)` 42 /// will break the word into two well-formed pieces. 43 /// 44 /// # Examples 45 /// 46 /// ``` 47 /// use textwrap::{HyphenSplitter, NoHyphenation, WordSplitter}; 48 /// assert_eq!(NoHyphenation.split_points("cannot-be-split"), vec![]); 49 /// assert_eq!(HyphenSplitter.split_points("can-be-split"), vec![4, 7]); 50 /// ``` split_points(&self, word: &str) -> Vec<usize>51 fn split_points(&self, word: &str) -> Vec<usize>; 52 } 53 54 impl<S: WordSplitter + ?Sized> WordSplitter for Box<S> { split_points(&self, word: &str) -> Vec<usize>55 fn split_points(&self, word: &str) -> Vec<usize> { 56 use std::ops::Deref; 57 self.deref().split_points(word) 58 } 59 } 60 61 impl<T: ?Sized + WordSplitter> WordSplitter for &T { split_points(&self, word: &str) -> Vec<usize>62 fn split_points(&self, word: &str) -> Vec<usize> { 63 (*self).split_points(word) 64 } 65 } 66 67 /// Use this as a [`Options.splitter`] to avoid any kind of 68 /// hyphenation: 69 /// 70 /// ``` 71 /// use textwrap::{wrap, NoHyphenation, Options}; 72 /// 73 /// let options = Options::new(8).splitter(NoHyphenation); 74 /// assert_eq!(wrap("foo bar-baz", &options), 75 /// vec!["foo", "bar-baz"]); 76 /// ``` 77 /// 78 /// [`Options.splitter`]: super::Options::splitter 79 #[derive(Clone, Copy, Debug)] 80 pub struct NoHyphenation; 81 82 /// `NoHyphenation` implements `WordSplitter` by not splitting the 83 /// word at all. 84 impl WordSplitter for NoHyphenation { split_points(&self, _: &str) -> Vec<usize>85 fn split_points(&self, _: &str) -> Vec<usize> { 86 Vec::new() 87 } 88 } 89 90 /// Simple and default way to split words: splitting on existing 91 /// hyphens only. 92 /// 93 /// You probably don't need to use this type since it's already used 94 /// by default by [`Options::new`](super::Options::new). 95 #[derive(Clone, Copy, Debug)] 96 pub struct HyphenSplitter; 97 98 /// `HyphenSplitter` is the default `WordSplitter` used by 99 /// [`Options::new`](super::Options::new). It will split words on any 100 /// existing hyphens in the word. 101 /// 102 /// It will only use hyphens that are surrounded by alphanumeric 103 /// characters, which prevents a word like `"--foo-bar"` from being 104 /// split into `"--"` and `"foo-bar"`. 105 impl WordSplitter for HyphenSplitter { split_points(&self, word: &str) -> Vec<usize>106 fn split_points(&self, word: &str) -> Vec<usize> { 107 let mut splits = Vec::new(); 108 109 for (idx, _) in word.match_indices('-') { 110 // We only use hyphens that are surrounded by alphanumeric 111 // characters. This is to avoid splitting on repeated hyphens, 112 // such as those found in --foo-bar. 113 let prev = word[..idx].chars().next_back(); 114 let next = word[idx + 1..].chars().next(); 115 116 if prev.filter(|ch| ch.is_alphanumeric()).is_some() 117 && next.filter(|ch| ch.is_alphanumeric()).is_some() 118 { 119 splits.push(idx + 1); // +1 due to width of '-'. 120 } 121 } 122 123 splits 124 } 125 } 126 127 /// A hyphenation dictionary can be used to do language-specific 128 /// hyphenation using patterns from the [hyphenation] crate. 129 /// 130 /// **Note:** Only available when the `hyphenation` Cargo feature is 131 /// enabled. 132 /// 133 /// [hyphenation]: https://docs.rs/hyphenation/ 134 #[cfg(feature = "hyphenation")] 135 impl WordSplitter for hyphenation::Standard { split_points(&self, word: &str) -> Vec<usize>136 fn split_points(&self, word: &str) -> Vec<usize> { 137 use hyphenation::Hyphenator; 138 self.hyphenate(word).breaks 139 } 140 } 141