pub(crate) fn find_words_ascii_space(line: &str) -> impl Iterator + '_ { let mut start = 0; let mut in_whitespace = false; let mut char_indices = line.char_indices(); std::iter::from_fn(move || { for (idx, ch) in char_indices.by_ref() { if in_whitespace && ch != ' ' { let word = &line[start..idx]; start = idx; in_whitespace = ch == ' '; return Some(word); } in_whitespace = ch == ' '; } if start < line.len() { let word = &line[start..]; start = line.len(); return Some(word); } None }) } #[cfg(test)] mod tests { use super::*; macro_rules! test_find_words { ($ascii_name:ident, $([ $line:expr, $ascii_words:expr ]),+) => { #[test] fn $ascii_name() { $( let expected_words: Vec<&str> = $ascii_words.to_vec(); let actual_words = find_words_ascii_space($line) .collect::>(); assert_eq!(actual_words, expected_words, "Line: {:?}", $line); )+ } }; } test_find_words!(ascii_space_empty, ["", []]); test_find_words!(ascii_single_word, ["foo", ["foo"]]); test_find_words!(ascii_two_words, ["foo bar", ["foo ", "bar"]]); test_find_words!( ascii_multiple_words, ["foo bar", ["foo ", "bar"]], ["x y z", ["x ", "y ", "z"]] ); test_find_words!(ascii_only_whitespace, [" ", [" "]], [" ", [" "]]); test_find_words!( ascii_inter_word_whitespace, ["foo bar", ["foo ", "bar"]] ); test_find_words!(ascii_trailing_whitespace, ["foo ", ["foo "]]); test_find_words!(ascii_leading_whitespace, [" foo", [" ", "foo"]]); test_find_words!( ascii_multi_column_char, ["\u{1f920}", ["\u{1f920}"]] // cowboy emoji 🤠 ); test_find_words!( ascii_hyphens, ["foo-bar", ["foo-bar"]], ["foo- bar", ["foo- ", "bar"]], ["foo - bar", ["foo ", "- ", "bar"]], ["foo -bar", ["foo ", "-bar"]] ); test_find_words!(ascii_newline, ["foo\nbar", ["foo\nbar"]]); test_find_words!(ascii_tab, ["foo\tbar", ["foo\tbar"]]); test_find_words!( ascii_non_breaking_space, ["foo\u{00A0}bar", ["foo\u{00A0}bar"]] ); }