Lines Matching +full:grapheme +full:- +full:splitter
28 /// A short-hand constructor for building a `&[u8]`.
74 pub fn B<'a, B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> &'a [u8] { in B()
80 fn as_bytes(&self) -> &[u8] { in as_bytes()
85 fn as_bytes_mut(&mut self) -> &mut [u8] { in as_bytes_mut()
92 fn as_bytes(&self) -> &[u8] { in as_bytes()
97 fn as_bytes_mut(&mut self) -> &mut [u8] { in as_bytes_mut()
115 /// no-op and callers shouldn't care about it. This only exists for making
118 fn as_bytes(&self) -> &[u8]; in as_bytes()
121 /// always a no-op and callers shouldn't care about it. This only exists
124 fn as_bytes_mut(&mut self) -> &mut [u8]; in as_bytes_mut()
131 /// shows its bytes as a normal string. For invalid UTF-8, hex escape
144 fn as_bstr(&self) -> &BStr { in as_bstr()
153 /// shows its bytes as a normal string. For invalid UTF-8, hex escape
167 fn as_bstr_mut(&mut self) -> &mut BStr { in as_bstr_mut()
175 /// given OS string is not valid UTF-8. (For example, when the underlying
177 /// sequence of arbitrary 16-bit integers. Not all such sequences can be
178 /// transcoded to valid UTF-8.)
190 /// let bs = <[u8]>::from_os_str(os_str).expect("should be valid UTF-8");
195 fn from_os_str(os_str: &OsStr) -> Option<&[u8]> { in from_os_str()
198 fn imp(os_str: &OsStr) -> Option<&[u8]> { in from_os_str()
206 fn imp(os_str: &OsStr) -> Option<&[u8]> { in from_os_str()
217 /// path is not valid UTF-8. (For example, when the underlying bytes are
219 /// arbitrary 16-bit integers. Not all such sequences can be transcoded to
220 /// valid UTF-8.)
232 /// let bs = <[u8]>::from_path(path).expect("should be valid UTF-8");
237 fn from_path(path: &Path) -> Option<&[u8]> { in from_path()
241 /// Safely convert this byte string into a `&str` if it's valid UTF-8.
243 /// If this byte string is not valid UTF-8, then an error is returned. The
260 /// # fn example() -> Result<(), bstr::Utf8Error> {
272 fn to_str(&self) -> Result<&str, Utf8Error> { in to_str()
281 /// valid UTF-8.
285 /// Callers *must* ensure that this byte string is valid UTF-8 before
287 /// not valid UTF-8 is considered undefined behavior.
290 /// UTF-8 validity of the byte string is already known and it is
291 /// undesirable to pay the cost of an additional UTF-8 validation check
302 /// // valid UTF-8 by the Rust compiler.
307 unsafe fn to_str_unchecked(&self) -> &str { in to_str_unchecked()
311 /// Convert this byte string to a valid UTF-8 string by replacing invalid
312 /// UTF-8 bytes with the Unicode replacement codepoint (`U+FFFD`).
314 /// If the byte string is already valid UTF-8, then no copying or
316 /// the byte string is not valid UTF-8, then an owned string buffer is
323 /// If there were previous bytes that represented a prefix of a well-formed
330 /// [Public Review Issue #121](https://www.unicode.org/review/pr-121.html).
358 /// // \xF1\x80\x80 is a valid 3-byte code unit prefix.
359 /// // \xE1\x80 is a valid 2-byte code unit prefix.
360 /// // \xC2 is a valid 1-byte code unit prefix.
372 fn to_str_lossy(&self) -> Cow<'_, str> { in to_str_lossy()
386 // that all of `valid` is valid UTF-8. in to_str_lossy()
398 /// buffer, while replacing invalid UTF-8 code unit sequences with the
408 /// valid UTF-8.
436 // that all of `bytes` is valid UTF-8. in to_str_lossy_into()
443 // that all of `valid` is valid UTF-8. in to_str_lossy_into()
458 /// always succeeds and is zero cost. Otherwise, this returns a UTF-8
459 /// decoding error if this byte string is not valid UTF-8. (For example,
461 /// are allowed to be a sequence of arbitrary 16-bit integers. There is
462 /// no obvious mapping from an arbitrary sequence of 8-bit integers to an
463 /// arbitrary sequence of 16-bit integers. If the representation of `OsStr`
474 /// let os_str = b"foo".to_os_str().expect("should be valid UTF-8");
479 fn to_os_str(&self) -> Result<&OsStr, Utf8Error> { in to_os_str()
482 fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> { in to_os_str()
490 fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> { in to_os_str()
501 /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
519 fn to_os_str_lossy(&self) -> Cow<'_, OsStr> { in to_os_str_lossy()
522 fn imp(bytes: &[u8]) -> Cow<'_, OsStr> { in to_os_str_lossy()
530 fn imp(bytes: &[u8]) -> Cow<OsStr> { in to_os_str_lossy()
545 /// always succeeds and is zero cost. Otherwise, this returns a UTF-8
546 /// decoding error if this byte string is not valid UTF-8. (For example,
548 /// are allowed to be a sequence of arbitrary 16-bit integers. There is
549 /// no obvious mapping from an arbitrary sequence of 8-bit integers to an
550 /// arbitrary sequence of 16-bit integers. If the representation of `Path`
561 /// let path = b"foo".to_path().expect("should be valid UTF-8");
566 fn to_path(&self) -> Result<&Path, Utf8Error> { in to_path()
574 /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
593 fn to_path_lossy(&self) -> Cow<'_, Path> { in to_path_lossy()
621 fn repeatn(&self, n: usize) -> Vec<u8> { in repeatn()
639 fn contains_str<B: AsRef<[u8]>>(&self, needle: B) -> bool { in contains_str()
657 fn starts_with_str<B: AsRef<[u8]>>(&self, prefix: B) -> bool { in starts_with_str()
675 fn ends_with_str<B: AsRef<[u8]>>(&self, suffix: B) -> bool { in ends_with_str()
710 fn find<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> { in find()
747 fn rfind<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> { in rfind()
751 /// Returns an iterator of the non-overlapping occurrences of the given
792 ) -> Find<'h, 'n> { in find_iter()
796 /// Returns an iterator of the non-overlapping occurrences of the given
837 ) -> FindReverse<'h, 'n> { in rfind_iter()
855 fn find_byte(&self, byte: u8) -> Option<usize> { in find_byte()
873 fn rfind_byte(&self, byte: u8) -> Option<usize> { in rfind_byte()
883 /// UTF-8 sequences will not be matched.
897 fn find_char(&self, ch: char) -> Option<usize> { in find_char()
907 /// UTF-8 sequences will not be matched.
921 fn rfind_char(&self, ch: char) -> Option<usize> { in rfind_char()
961 fn find_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> { in find_byteset()
1002 fn find_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> { in find_not_byteset()
1039 fn rfind_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> { in rfind_byteset()
1076 fn rfind_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> { in rfind_not_byteset()
1105 fn fields(&self) -> Fields<'_> { in fields()
1112 /// If this byte string is not valid UTF-8, then the given closure will
1113 /// be called with a Unicode replacement codepoint when invalid UTF-8
1137 fn fields_with<F: FnMut(char) -> bool>(&self, f: F) -> FieldsWith<'_, F> { in fields_with()
1143 /// include the splitter substring.
1145 /// The splitter may be any type that can be cheaply converted into a
1207 /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
1208 /// // may not be valid UTF-8!
1232 splitter: &'s B, in split_str()
1233 ) -> Split<'h, 's> { in split_str()
1234 Split::new(self.as_bytes(), splitter.as_ref()) in split_str()
1239 /// not to include the splitter substring.
1241 /// The splitter may be any type that can be cheaply converted into a
1304 /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
1305 /// // may not be valid UTF-8!
1326 splitter: &'s B, in rsplit_str()
1327 ) -> SplitReverse<'h, 's> { in rsplit_str()
1328 SplitReverse::new(self.as_bytes(), splitter.as_ref()) in rsplit_str()
1331 /// Split this byte string at the first occurrence of `splitter`.
1333 /// If the `splitter` is found in the byte string, returns a tuple
1335 /// of `splitter` respectively. Otherwise, if there are no occurrences of
1336 /// `splitter` in the byte string, returns `None`.
1338 /// The splitter may be any type that can be cheaply converted into a
1366 splitter: &B, in split_once_str()
1367 ) -> Option<(&'a [u8], &'a [u8])> { in split_once_str()
1369 let splitter = splitter.as_ref(); in split_once_str() localVariable
1370 let start = Finder::new(splitter).find(bytes)?; in split_once_str()
1371 let end = start + splitter.len(); in split_once_str()
1375 /// Split this byte string at the last occurrence of `splitter`.
1377 /// If the `splitter` is found in the byte string, returns a tuple
1379 /// of `splitter`, respectively. Otherwise, if there are no occurrences of
1380 /// `splitter` in the byte string, returns `None`.
1382 /// The splitter may be any type that can be cheaply converted into a
1410 splitter: &B, in rsplit_once_str()
1411 ) -> Option<(&'a [u8], &'a [u8])> { in rsplit_once_str()
1413 let splitter = splitter.as_ref(); in rsplit_once_str() localVariable
1414 let start = FinderReverse::new(splitter).rfind(bytes)?; in rsplit_once_str()
1415 let end = start + splitter.len(); in rsplit_once_str()
1458 splitter: &'s B, in splitn_str()
1459 ) -> SplitN<'h, 's> { in splitn_str()
1460 SplitN::new(self.as_bytes(), splitter.as_ref(), limit) in splitn_str()
1504 splitter: &'s B, in rsplitn_str()
1505 ) -> SplitNReverse<'h, 's> { in rsplitn_str()
1506 SplitNReverse::new(self.as_bytes(), splitter.as_ref(), limit) in rsplitn_str()
1549 ) -> Vec<u8> { in replace()
1596 ) -> Vec<u8> { in replacen()
1745 fn bytes(&self) -> Bytes<'_> { in bytes()
1750 /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
1775 fn chars(&self) -> Chars<'_> { in chars()
1781 /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1786 /// invalid UTF-8, this iterator provides both the corresponding starting
1788 /// is necessary to slice the original byte string when invalid UTF-8 bytes
1830 fn char_indices(&self) -> CharIndices<'_> { in char_indices()
1834 /// Iterate over chunks of valid UTF-8.
1836 /// The iterator returned yields chunks of valid UTF-8 separated by invalid
1837 /// UTF-8 bytes, if they exist. Invalid UTF-8 bytes are always 1-3 bytes,
1867 fn utf8_chunks(&self) -> Utf8Chunks<'_> { in utf8_chunks()
1871 /// Returns an iterator over the grapheme clusters in this byte string.
1872 /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
1878 /// single grapheme cluster:
1899 fn graphemes(&self) -> Graphemes<'_> { in graphemes()
1903 /// Returns an iterator over the grapheme clusters in this byte string
1905 /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1911 /// grapheme cluster:
1922 /// This example shows what happens when invalid UTF-8 is encountered. Note
1945 fn grapheme_indices(&self) -> GraphemeIndices<'_> { in grapheme_indices()
1950 /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1962 /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1963 /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1981 fn words(&self) -> Words<'_> { in words()
1997 /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1998 /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2019 fn word_indices(&self) -> WordIndices<'_> { in word_indices()
2026 /// codepoint substitutions if invalid UTF-8 is encountered).
2029 /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2030 /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2049 fn words_with_breaks(&self) -> WordsWithBreaks<'_> { in words_with_breaks()
2056 /// (modulo Unicode replacement codepoint substitutions if invalid UTF-8 is
2060 /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2061 /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2086 fn words_with_break_indices(&self) -> WordsWithBreakIndices<'_> { in words_with_break_indices()
2095 /// substitutions if invalid UTF-8 is encountered).
2098 /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2099 /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2118 fn sentences(&self) -> Sentences<'_> { in sentences()
2128 /// substitutions if invalid UTF-8 is encountered).
2131 /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2132 /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2152 fn sentence_indices(&self) -> SentenceIndices<'_> { in sentence_indices()
2183 fn lines(&self) -> Lines<'_> { in lines()
2227 fn lines_with_terminator(&self) -> LinesWithTerminator<'_> { in lines_with_terminator()
2249 fn trim(&self) -> &[u8] { in trim()
2270 fn trim_start(&self) -> &[u8] { in trim_start()
2292 fn trim_end(&self) -> &[u8] { in trim_end()
2311 fn trim_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] { in trim_with()
2329 fn trim_start_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] { in trim_start_with()
2352 fn trim_end_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] { in trim_end_with()
2367 /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
2398 /// Invalid UTF-8 remains as is:
2408 fn to_lowercase(&self) -> Vec<u8> { in to_lowercase()
2420 /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
2457 /// Invalid UTF-8 remains as is:
2496 /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
2515 /// Invalid UTF-8 remains as is:
2525 fn to_ascii_lowercase(&self) -> Vec<u8> { in to_ascii_lowercase()
2532 /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
2550 /// Invalid UTF-8 remains as is:
2572 /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
2603 /// Invalid UTF-8 remains as is:
2613 fn to_uppercase(&self) -> Vec<u8> { in to_uppercase()
2625 /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
2662 /// Invalid UTF-8 remains as is:
2698 /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
2717 /// Invalid UTF-8 remains as is:
2727 fn to_ascii_uppercase(&self) -> Vec<u8> { in to_ascii_uppercase()
2734 /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
2752 /// Invalid UTF-8 remains as is:
2771 /// byte string contains valid UTF-8 that isn't ASCII, then reversing the
2772 /// string will likely result in invalid UTF-8 and otherwise non-sensical
2797 /// If this byte string is valid UTF-8, then its reversal by codepoint
2798 /// is also guaranteed to be valid UTF-8.
2815 /// if this byte string contains grapheme clusters with more than one
2816 /// codepoint, then those grapheme clusters will not necessarily be
2817 /// preserved. If you'd like to preserve grapheme clusters, then use
2879 /// If this byte string is valid UTF-8, then its reversal by grapheme
2880 /// is also guaranteed to be valid UTF-8.
2908 /// This example shows how this correctly handles grapheme clusters,
2955 fn is_ascii(&self) -> bool { in is_ascii()
2959 /// Returns true if and only if the entire byte string is valid UTF-8.
2962 /// invalid UTF-8 byte is, then use the [`to_str`](#method.to_str) method.
2983 fn is_utf8(&self) -> bool { in is_utf8()
2987 /// Returns the last byte in this byte string, if it's non-empty. If this
3004 fn last_byte(&self) -> Option<u8> { in last_byte()
3009 /// Returns the index of the first non-ASCII byte in this byte string (if
3025 fn find_non_ascii_byte(&self) -> Option<usize> { in find_non_ascii_byte()
3040 /// concern when it's necessary to re-use the same needle to search multiple
3057 pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> Finder<'a> { in new()
3064 /// If this is already an owned finder, then this is a no-op. Otherwise,
3070 pub fn into_owned(self) -> Finder<'static> { in into_owned()
3081 pub fn needle(&self) -> &[u8] { in needle()
3113 pub fn find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> { in find()
3123 /// concern when it's necessary to re-use the same needle to search multiple
3140 pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> FinderReverse<'a> { in new()
3147 /// If this is already an owned finder, then this is a no-op. Otherwise,
3153 pub fn into_owned(self) -> FinderReverse<'static> { in into_owned()
3164 pub fn needle(&self) -> &[u8] { in needle()
3196 pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> { in rfind()
3201 /// An iterator over non-overlapping substring matches.
3215 fn new(haystack: &'h [u8], needle: &'n [u8]) -> Find<'h, 'n> { in new()
3224 fn next(&mut self) -> Option<usize> { in next()
3229 /// An iterator over non-overlapping substring matches in reverse.
3243 fn new(haystack: &'h [u8], needle: &'n [u8]) -> FindReverse<'h, 'n> { in new()
3251 fn haystack(&self) -> &'h [u8] { in haystack()
3255 fn needle(&self) -> &'n [u8] { in needle()
3264 fn next(&mut self) -> Option<usize> { in next()
3282 pub fn as_bytes(&self) -> &'a [u8] { in as_bytes()
3291 fn next(&mut self) -> Option<u8> { in next()
3296 fn size_hint(&self) -> (usize, Option<usize>) { in size_hint()
3303 fn next_back(&mut self) -> Option<u8> { in next_back()
3310 fn len(&self) -> usize { in len()
3329 it: FieldsWith<'a, fn(char) -> bool>,
3334 fn new(bytes: &'a [u8]) -> Fields<'a> { in new()
3344 fn next(&mut self) -> Option<&'a [u8]> { in next()
3357 /// of the predicate, i.e., `FnMut(char) -> bool`.
3365 impl<'a, F: FnMut(char) -> bool> FieldsWith<'a, F> {
3366 fn new(bytes: &'a [u8], f: F) -> FieldsWith<'a, F> { in new()
3371 impl<'a, F: FnMut(char) -> bool> Iterator for FieldsWith<'a, F> {
3375 fn next(&mut self) -> Option<&'a [u8]> { in next()
3406 /// The end position of the previous match of our splitter. The element
3408 /// beginning of the next match of the splitter.
3411 /// splitter is matched at the end of the haystack. At that point, we still
3417 fn new(haystack: &'h [u8], splitter: &'s [u8]) -> Split<'h, 's> { in new()
3418 let finder = haystack.find_iter(splitter); in new()
3427 fn next(&mut self) -> Option<&'h [u8]> { in next()
3462 /// The end position of the previous match of our splitter. The element
3464 /// beginning of the next match of the splitter.
3467 /// splitter is matched at the end of the haystack. At that point, we still
3473 fn new(haystack: &'h [u8], splitter: &'s [u8]) -> SplitReverse<'h, 's> { in new()
3474 let finder = haystack.rfind_iter(splitter); in new()
3483 fn next(&mut self) -> Option<&'h [u8]> { in next()
3526 splitter: &'s [u8], in new()
3528 ) -> SplitN<'h, 's> { in new()
3529 let split = haystack.split_str(splitter); in new()
3538 fn next(&mut self) -> Option<&'h [u8]> { in next()
3565 splitter: &'s [u8], in new()
3567 ) -> SplitNReverse<'h, 's> { in new()
3568 let split = haystack.rsplit_str(splitter); in new()
3577 fn next(&mut self) -> Option<&'h [u8]> { in next()
3601 fn new(bytes: &'a [u8]) -> Lines<'a> { in new()
3623 pub fn as_bytes(&self) -> &'a [u8] { in as_bytes()
3632 fn next(&mut self) -> Option<&'a [u8]> { in next()
3639 fn next_back(&mut self) -> Option<Self::Item> { in next_back()
3665 fn new(bytes: &'a [u8]) -> LinesWithTerminator<'a> { in new()
3687 pub fn as_bytes(&self) -> &'a [u8] { in as_bytes()
3696 fn next(&mut self) -> Option<&'a [u8]> { in next()
3715 fn next_back(&mut self) -> Option<Self::Item> { in next_back()
3734 fn trim_last_terminator(mut s: &[u8]) -> &[u8] { in trim_last_terminator()
3736 s = &s[..s.len() - 1]; in trim_last_terminator()
3738 s = &s[..s.len() - 1]; in trim_last_terminator()