ext_slice.rs - OpenGrok cross reference for /external/rust/crates/bstr/src/ext

Lines Matching +full:grapheme +full:- +full:splitter
28 /// A short-hand constructor for building a `&[u8]`.
74 pub fn B<'a, B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> &'a [u8] {  in B()
80     fn as_bytes(&self) -> &[u8] {  in as_bytes()
85     fn as_bytes_mut(&mut self) -> &mut [u8] {  in as_bytes_mut()
92     fn as_bytes(&self) -> &[u8] {  in as_bytes()
97     fn as_bytes_mut(&mut self) -> &mut [u8] {  in as_bytes_mut()
115     /// no-op and callers shouldn't care about it. This only exists for making
118     fn as_bytes(&self) -> &[u8];  in as_bytes()
121     /// always a no-op and callers shouldn't care about it. This only exists
124     fn as_bytes_mut(&mut self) -> &mut [u8];  in as_bytes_mut()
131     /// shows its bytes as a normal string. For invalid UTF-8, hex escape
144     fn as_bstr(&self) -> &BStr {  in as_bstr()
153     /// shows its bytes as a normal string. For invalid UTF-8, hex escape
167     fn as_bstr_mut(&mut self) -> &mut BStr {  in as_bstr_mut()
175     /// given OS string is not valid UTF-8. (For example, when the underlying
177     /// sequence of arbitrary 16-bit integers. Not all such sequences can be
178     /// transcoded to valid UTF-8.)
190     /// let bs = <[u8]>::from_os_str(os_str).expect("should be valid UTF-8");
195     fn from_os_str(os_str: &OsStr) -> Option<&[u8]> {  in from_os_str()
198         fn imp(os_str: &OsStr) -> Option<&[u8]> {  in from_os_str()
206         fn imp(os_str: &OsStr) -> Option<&[u8]> {  in from_os_str()
217     /// path is not valid UTF-8. (For example, when the underlying bytes are
219     /// arbitrary 16-bit integers. Not all such sequences can be transcoded to
220     /// valid UTF-8.)
232     /// let bs = <[u8]>::from_path(path).expect("should be valid UTF-8");
237     fn from_path(path: &Path) -> Option<&[u8]> {  in from_path()
241     /// Safely convert this byte string into a `&str` if it's valid UTF-8.
243     /// If this byte string is not valid UTF-8, then an error is returned. The
260     /// # fn example() -> Result<(), bstr::Utf8Error> {
272     fn to_str(&self) -> Result<&str, Utf8Error> {  in to_str()
281     /// valid UTF-8.
285     /// Callers *must* ensure that this byte string is valid UTF-8 before
287     /// not valid UTF-8 is considered undefined behavior.
290     /// UTF-8 validity of the byte string is already known and it is
291     /// undesirable to pay the cost of an additional UTF-8 validation check
302     /// // valid UTF-8 by the Rust compiler.
307     unsafe fn to_str_unchecked(&self) -> &str {  in to_str_unchecked()
311     /// Convert this byte string to a valid UTF-8 string by replacing invalid
312     /// UTF-8 bytes with the Unicode replacement codepoint (`U+FFFD`).
314     /// If the byte string is already valid UTF-8, then no copying or
316     /// the byte string is not valid UTF-8, then an owned string buffer is
323     /// If there were previous bytes that represented a prefix of a well-formed
330     /// [Public Review Issue #121](https://www.unicode.org/review/pr-121.html).
358     /// // \xF1\x80\x80 is a valid 3-byte code unit prefix.
359     /// // \xE1\x80 is a valid 2-byte code unit prefix.
360     /// // \xC2 is a valid 1-byte code unit prefix.
372     fn to_str_lossy(&self) -> Cow<'_, str> {  in to_str_lossy()
386                 // that all of `valid` is valid UTF-8.  in to_str_lossy()
398     /// buffer, while replacing invalid UTF-8 code unit sequences with the
408     /// valid UTF-8.
436                     // that all of `bytes` is valid UTF-8.  in to_str_lossy_into()
443                     // that all of `valid` is valid UTF-8.  in to_str_lossy_into()
458     /// always succeeds and is zero cost. Otherwise, this returns a UTF-8
459     /// decoding error if this byte string is not valid UTF-8. (For example,
461     /// are allowed to be a sequence of arbitrary 16-bit integers. There is
462     /// no obvious mapping from an arbitrary sequence of 8-bit integers to an
463     /// arbitrary sequence of 16-bit integers. If the representation of `OsStr`
474     /// let os_str = b"foo".to_os_str().expect("should be valid UTF-8");
479     fn to_os_str(&self) -> Result<&OsStr, Utf8Error> {  in to_os_str()
482         fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {  in to_os_str()
490         fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {  in to_os_str()
501     /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
519     fn to_os_str_lossy(&self) -> Cow<'_, OsStr> {  in to_os_str_lossy()
522         fn imp(bytes: &[u8]) -> Cow<'_, OsStr> {  in to_os_str_lossy()
530         fn imp(bytes: &[u8]) -> Cow<OsStr> {  in to_os_str_lossy()
545     /// always succeeds and is zero cost. Otherwise, this returns a UTF-8
546     /// decoding error if this byte string is not valid UTF-8. (For example,
548     /// are allowed to be a sequence of arbitrary 16-bit integers. There is
549     /// no obvious mapping from an arbitrary sequence of 8-bit integers to an
550     /// arbitrary sequence of 16-bit integers. If the representation of `Path`
561     /// let path = b"foo".to_path().expect("should be valid UTF-8");
566     fn to_path(&self) -> Result<&Path, Utf8Error> {  in to_path()
574     /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
593     fn to_path_lossy(&self) -> Cow<'_, Path> {  in to_path_lossy()
621     fn repeatn(&self, n: usize) -> Vec<u8> {  in repeatn()
639     fn contains_str<B: AsRef<[u8]>>(&self, needle: B) -> bool {  in contains_str()
657     fn starts_with_str<B: AsRef<[u8]>>(&self, prefix: B) -> bool {  in starts_with_str()
675     fn ends_with_str<B: AsRef<[u8]>>(&self, suffix: B) -> bool {  in ends_with_str()
710     fn find<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {  in find()
747     fn rfind<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {  in rfind()
751     /// Returns an iterator of the non-overlapping occurrences of the given
792     ) -> Find<'h, 'n> {  in find_iter()
796     /// Returns an iterator of the non-overlapping occurrences of the given
837     ) -> FindReverse<'h, 'n> {  in rfind_iter()
855     fn find_byte(&self, byte: u8) -> Option<usize> {  in find_byte()
873     fn rfind_byte(&self, byte: u8) -> Option<usize> {  in rfind_byte()
883     /// UTF-8 sequences will not be matched.
897     fn find_char(&self, ch: char) -> Option<usize> {  in find_char()
907     /// UTF-8 sequences will not be matched.
921     fn rfind_char(&self, ch: char) -> Option<usize> {  in rfind_char()
961     fn find_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {  in find_byteset()
1002     fn find_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {  in find_not_byteset()
1039     fn rfind_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {  in rfind_byteset()
1076     fn rfind_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {  in rfind_not_byteset()
1105     fn fields(&self) -> Fields<'_> {  in fields()
1112     /// If this byte string is not valid UTF-8, then the given closure will
1113     /// be called with a Unicode replacement codepoint when invalid UTF-8
1137     fn fields_with<F: FnMut(char) -> bool>(&self, f: F) -> FieldsWith<'_, F> {  in fields_with()
1143     /// include the splitter substring.
1145     /// The splitter may be any type that can be cheaply converted into a
1207     /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
1208     /// // may not be valid UTF-8!
1232         splitter: &'s B,  in split_str()
1233     ) -> Split<'h, 's> {  in split_str()
1234         Split::new(self.as_bytes(), splitter.as_ref())  in split_str()
1239     /// not to include the splitter substring.
1241     /// The splitter may be any type that can be cheaply converted into a
1304     /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
1305     /// // may not be valid UTF-8!
1326         splitter: &'s B,  in rsplit_str()
1327     ) -> SplitReverse<'h, 's> {  in rsplit_str()
1328         SplitReverse::new(self.as_bytes(), splitter.as_ref())  in rsplit_str()
1331     /// Split this byte string at the first occurrence of `splitter`.
1333     /// If the `splitter` is found in the byte string, returns a tuple
1335     /// of `splitter` respectively. Otherwise, if there are no occurrences of
1336     /// `splitter` in the byte string, returns `None`.
1338     /// The splitter may be any type that can be cheaply converted into a
1366         splitter: &B,  in split_once_str()
1367     ) -> Option<(&'a [u8], &'a [u8])> {  in split_once_str()
1369         let splitter = splitter.as_ref();  in split_once_str()  localVariable
1370         let start = Finder::new(splitter).find(bytes)?;  in split_once_str()
1371         let end = start + splitter.len();  in split_once_str()
1375     /// Split this byte string at the last occurrence of `splitter`.
1377     /// If the `splitter` is found in the byte string, returns a tuple
1379     /// of `splitter`, respectively. Otherwise, if there are no occurrences of
1380     /// `splitter` in the byte string, returns `None`.
1382     /// The splitter may be any type that can be cheaply converted into a
1410         splitter: &B,  in rsplit_once_str()
1411     ) -> Option<(&'a [u8], &'a [u8])> {  in rsplit_once_str()
1413         let splitter = splitter.as_ref();  in rsplit_once_str()  localVariable
1414         let start = FinderReverse::new(splitter).rfind(bytes)?;  in rsplit_once_str()
1415         let end = start + splitter.len();  in rsplit_once_str()
1458         splitter: &'s B,  in splitn_str()
1459     ) -> SplitN<'h, 's> {  in splitn_str()
1460         SplitN::new(self.as_bytes(), splitter.as_ref(), limit)  in splitn_str()
1504         splitter: &'s B,  in rsplitn_str()
1505     ) -> SplitNReverse<'h, 's> {  in rsplitn_str()
1506         SplitNReverse::new(self.as_bytes(), splitter.as_ref(), limit)  in rsplitn_str()
1549     ) -> Vec<u8> {  in replace()
1596     ) -> Vec<u8> {  in replacen()
1745     fn bytes(&self) -> Bytes<'_> {  in bytes()
1750     /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
1775     fn chars(&self) -> Chars<'_> {  in chars()
1781     /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1786     /// invalid UTF-8, this iterator provides both the corresponding starting
1788     /// is necessary to slice the original byte string when invalid UTF-8 bytes
1830     fn char_indices(&self) -> CharIndices<'_> {  in char_indices()
1834     /// Iterate over chunks of valid UTF-8.
1836     /// The iterator returned yields chunks of valid UTF-8 separated by invalid
1837     /// UTF-8 bytes, if they exist. Invalid UTF-8 bytes are always 1-3 bytes,
1867     fn utf8_chunks(&self) -> Utf8Chunks<'_> {  in utf8_chunks()
1871     /// Returns an iterator over the grapheme clusters in this byte string.
1872     /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
1878     /// single grapheme cluster:
1899     fn graphemes(&self) -> Graphemes<'_> {  in graphemes()
1903     /// Returns an iterator over the grapheme clusters in this byte string
1905     /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1911     /// grapheme cluster:
1922     /// This example shows what happens when invalid UTF-8 is encountered. Note
1945     fn grapheme_indices(&self) -> GraphemeIndices<'_> {  in grapheme_indices()
1950     /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1962     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1963     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1981     fn words(&self) -> Words<'_> {  in words()
1997     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1998     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2019     fn word_indices(&self) -> WordIndices<'_> {  in word_indices()
2026     /// codepoint substitutions if invalid UTF-8 is encountered).
2029     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2030     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2049     fn words_with_breaks(&self) -> WordsWithBreaks<'_> {  in words_with_breaks()
2056     /// (modulo Unicode replacement codepoint substitutions if invalid UTF-8 is
2060     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2061     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2086     fn words_with_break_indices(&self) -> WordsWithBreakIndices<'_> {  in words_with_break_indices()
2095     /// substitutions if invalid UTF-8 is encountered).
2098     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2099     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2118     fn sentences(&self) -> Sentences<'_> {  in sentences()
2128     /// substitutions if invalid UTF-8 is encountered).
2131     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2132     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2152     fn sentence_indices(&self) -> SentenceIndices<'_> {  in sentence_indices()
2183     fn lines(&self) -> Lines<'_> {  in lines()
2227     fn lines_with_terminator(&self) -> LinesWithTerminator<'_> {  in lines_with_terminator()
2249     fn trim(&self) -> &[u8] {  in trim()
2270     fn trim_start(&self) -> &[u8] {  in trim_start()
2292     fn trim_end(&self) -> &[u8] {  in trim_end()
2311     fn trim_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {  in trim_with()
2329     fn trim_start_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {  in trim_start_with()
2352     fn trim_end_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {  in trim_end_with()
2367     /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
2398     /// Invalid UTF-8 remains as is:
2408     fn to_lowercase(&self) -> Vec<u8> {  in to_lowercase()
2420     /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
2457     /// Invalid UTF-8 remains as is:
2496     /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
2515     /// Invalid UTF-8 remains as is:
2525     fn to_ascii_lowercase(&self) -> Vec<u8> {  in to_ascii_lowercase()
2532     /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
2550     /// Invalid UTF-8 remains as is:
2572     /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
2603     /// Invalid UTF-8 remains as is:
2613     fn to_uppercase(&self) -> Vec<u8> {  in to_uppercase()
2625     /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
2662     /// Invalid UTF-8 remains as is:
2698     /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
2717     /// Invalid UTF-8 remains as is:
2727     fn to_ascii_uppercase(&self) -> Vec<u8> {  in to_ascii_uppercase()
2734     /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
2752     /// Invalid UTF-8 remains as is:
2771     /// byte string contains valid UTF-8 that isn't ASCII, then reversing the
2772     /// string will likely result in invalid UTF-8 and otherwise non-sensical
2797     /// If this byte string is valid UTF-8, then its reversal by codepoint
2798     /// is also guaranteed to be valid UTF-8.
2815     /// if this byte string contains grapheme clusters with more than one
2816     /// codepoint, then those grapheme clusters will not necessarily be
2817     /// preserved. If you'd like to preserve grapheme clusters, then use
2879     /// If this byte string is valid UTF-8, then its reversal by grapheme
2880     /// is also guaranteed to be valid UTF-8.
2908     /// This example shows how this correctly handles grapheme clusters,
2955     fn is_ascii(&self) -> bool {  in is_ascii()
2959     /// Returns true if and only if the entire byte string is valid UTF-8.
2962     /// invalid UTF-8 byte is, then use the [`to_str`](#method.to_str) method.
2983     fn is_utf8(&self) -> bool {  in is_utf8()
2987     /// Returns the last byte in this byte string, if it's non-empty. If this
3004     fn last_byte(&self) -> Option<u8> {  in last_byte()
3009     /// Returns the index of the first non-ASCII byte in this byte string (if
3025     fn find_non_ascii_byte(&self) -> Option<usize> {  in find_non_ascii_byte()
3040 /// concern when it's necessary to re-use the same needle to search multiple
3057     pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> Finder<'a> {  in new()
3064     /// If this is already an owned finder, then this is a no-op. Otherwise,
3070     pub fn into_owned(self) -> Finder<'static> {  in into_owned()
3081     pub fn needle(&self) -> &[u8] {  in needle()
3113     pub fn find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {  in find()
3123 /// concern when it's necessary to re-use the same needle to search multiple
3140     pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> FinderReverse<'a> {  in new()
3147     /// If this is already an owned finder, then this is a no-op. Otherwise,
3153     pub fn into_owned(self) -> FinderReverse<'static> {  in into_owned()
3164     pub fn needle(&self) -> &[u8] {  in needle()
3196     pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {  in rfind()
3201 /// An iterator over non-overlapping substring matches.
3215     fn new(haystack: &'h [u8], needle: &'n [u8]) -> Find<'h, 'n> {  in new()
3224     fn next(&mut self) -> Option<usize> {  in next()
3229 /// An iterator over non-overlapping substring matches in reverse.
3243     fn new(haystack: &'h [u8], needle: &'n [u8]) -> FindReverse<'h, 'n> {  in new()
3251     fn haystack(&self) -> &'h [u8] {  in haystack()
3255     fn needle(&self) -> &'n [u8] {  in needle()
3264     fn next(&mut self) -> Option<usize> {  in next()
3282     pub fn as_bytes(&self) -> &'a [u8] {  in as_bytes()
3291     fn next(&mut self) -> Option<u8> {  in next()
3296     fn size_hint(&self) -> (usize, Option<usize>) {  in size_hint()
3303     fn next_back(&mut self) -> Option<u8> {  in next_back()
3310     fn len(&self) -> usize {  in len()
3329     it: FieldsWith<'a, fn(char) -> bool>,
3334     fn new(bytes: &'a [u8]) -> Fields<'a> {  in new()
3344     fn next(&mut self) -> Option<&'a [u8]> {  in next()
3357 /// of the predicate, i.e., `FnMut(char) -> bool`.
3365 impl<'a, F: FnMut(char) -> bool> FieldsWith<'a, F> {
3366     fn new(bytes: &'a [u8], f: F) -> FieldsWith<'a, F> {  in new()
3371 impl<'a, F: FnMut(char) -> bool> Iterator for FieldsWith<'a, F> {
3375     fn next(&mut self) -> Option<&'a [u8]> {  in next()
3406     /// The end position of the previous match of our splitter. The element
3408     /// beginning of the next match of the splitter.
3411     /// splitter is matched at the end of the haystack. At that point, we still
3417     fn new(haystack: &'h [u8], splitter: &'s [u8]) -> Split<'h, 's> {  in new()
3418         let finder = haystack.find_iter(splitter);  in new()
3427     fn next(&mut self) -> Option<&'h [u8]> {  in next()
3462     /// The end position of the previous match of our splitter. The element
3464     /// beginning of the next match of the splitter.
3467     /// splitter is matched at the end of the haystack. At that point, we still
3473     fn new(haystack: &'h [u8], splitter: &'s [u8]) -> SplitReverse<'h, 's> {  in new()
3474         let finder = haystack.rfind_iter(splitter);  in new()
3483     fn next(&mut self) -> Option<&'h [u8]> {  in next()
3526         splitter: &'s [u8],  in new()
3528     ) -> SplitN<'h, 's> {  in new()
3529         let split = haystack.split_str(splitter);  in new()
3538     fn next(&mut self) -> Option<&'h [u8]> {  in next()
3565         splitter: &'s [u8],  in new()
3567     ) -> SplitNReverse<'h, 's> {  in new()
3568         let split = haystack.rsplit_str(splitter);  in new()
3577     fn next(&mut self) -> Option<&'h [u8]> {  in next()
3601     fn new(bytes: &'a [u8]) -> Lines<'a> {  in new()
3623     pub fn as_bytes(&self) -> &'a [u8] {  in as_bytes()
3632     fn next(&mut self) -> Option<&'a [u8]> {  in next()
3639     fn next_back(&mut self) -> Option<Self::Item> {  in next_back()
3665     fn new(bytes: &'a [u8]) -> LinesWithTerminator<'a> {  in new()
3687     pub fn as_bytes(&self) -> &'a [u8] {  in as_bytes()
3696     fn next(&mut self) -> Option<&'a [u8]> {  in next()
3715     fn next_back(&mut self) -> Option<Self::Item> {  in next_back()
3734 fn trim_last_terminator(mut s: &[u8]) -> &[u8] {  in trim_last_terminator()
3736         s = &s[..s.len() - 1];  in trim_last_terminator()
3738             s = &s[..s.len() - 1];  in trim_last_terminator()