• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use core::{iter, slice, str};
2 
3 #[cfg(all(feature = "alloc", feature = "unicode"))]
4 use alloc::vec;
5 #[cfg(feature = "alloc")]
6 use alloc::{borrow::Cow, string::String, vec::Vec};
7 
8 #[cfg(feature = "std")]
9 use std::{ffi::OsStr, path::Path};
10 
11 use memchr::{memchr, memmem, memrchr};
12 
13 #[cfg(feature = "alloc")]
14 use crate::ext_vec::ByteVec;
15 #[cfg(feature = "unicode")]
16 use crate::unicode::{
17     whitespace_len_fwd, whitespace_len_rev, GraphemeIndices, Graphemes,
18     SentenceIndices, Sentences, WordIndices, Words, WordsWithBreakIndices,
19     WordsWithBreaks,
20 };
21 use crate::{
22     ascii,
23     bstr::BStr,
24     byteset,
25     utf8::{self, CharIndices, Chars, Utf8Chunks, Utf8Error},
26 };
27 
28 /// A short-hand constructor for building a `&[u8]`.
29 ///
30 /// This idiosyncratic constructor is useful for concisely building byte string
31 /// slices. Its primary utility is in conveniently writing byte string literals
32 /// in a uniform way. For example, consider this code that does not compile:
33 ///
34 /// ```ignore
35 /// let strs = vec![b"a", b"xy"];
36 /// ```
37 ///
38 /// The above code doesn't compile because the type of the byte string literal
39 /// `b"a"` is `&'static [u8; 1]`, and the type of `b"xy"` is
40 /// `&'static [u8; 2]`. Since their types aren't the same, they can't be stored
41 /// in the same `Vec`. (This is dissimilar from normal Unicode string slices,
42 /// where both `"a"` and `"xy"` have the same type of `&'static str`.)
43 ///
44 /// One way of getting the above code to compile is to convert byte strings to
45 /// slices. You might try this:
46 ///
47 /// ```ignore
48 /// let strs = vec![&b"a", &b"xy"];
49 /// ```
50 ///
51 /// But this just creates values with type `& &'static [u8; 1]` and
52 /// `& &'static [u8; 2]`. Instead, you need to force the issue like so:
53 ///
54 /// ```
55 /// let strs = vec![&b"a"[..], &b"xy"[..]];
56 /// // or
57 /// let strs = vec![b"a".as_ref(), b"xy".as_ref()];
58 /// ```
59 ///
60 /// But neither of these are particularly convenient to type, especially when
61 /// it's something as common as a string literal. Thus, this constructor
62 /// permits writing the following instead:
63 ///
64 /// ```
65 /// use bstr::B;
66 ///
67 /// let strs = vec![B("a"), B(b"xy")];
68 /// ```
69 ///
70 /// Notice that this also lets you mix and match both string literals and byte
71 /// string literals. This can be quite convenient!
72 #[allow(non_snake_case)]
73 #[inline]
B<'a, B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> &'a [u8]74 pub fn B<'a, B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> &'a [u8] {
75     bytes.as_ref()
76 }
77 
78 impl ByteSlice for [u8] {
79     #[inline]
as_bytes(&self) -> &[u8]80     fn as_bytes(&self) -> &[u8] {
81         self
82     }
83 
84     #[inline]
as_bytes_mut(&mut self) -> &mut [u8]85     fn as_bytes_mut(&mut self) -> &mut [u8] {
86         self
87     }
88 }
89 
90 impl<const N: usize> ByteSlice for [u8; N] {
91     #[inline]
as_bytes(&self) -> &[u8]92     fn as_bytes(&self) -> &[u8] {
93         self
94     }
95 
96     #[inline]
as_bytes_mut(&mut self) -> &mut [u8]97     fn as_bytes_mut(&mut self) -> &mut [u8] {
98         self
99     }
100 }
101 
102 /// Ensure that callers cannot implement `ByteSlice` by making an
103 /// umplementable trait its super trait.
104 mod private {
105     pub trait Sealed {}
106 }
107 impl private::Sealed for [u8] {}
108 impl<const N: usize> private::Sealed for [u8; N] {}
109 
110 /// A trait that extends `&[u8]` with string oriented methods.
111 ///
112 /// This trait is sealed and cannot be implemented outside of `bstr`.
113 pub trait ByteSlice: private::Sealed {
114     /// A method for accessing the raw bytes of this type. This is always a
115     /// no-op and callers shouldn't care about it. This only exists for making
116     /// the extension trait work.
117     #[doc(hidden)]
as_bytes(&self) -> &[u8]118     fn as_bytes(&self) -> &[u8];
119 
120     /// A method for accessing the raw bytes of this type, mutably. This is
121     /// always a no-op and callers shouldn't care about it. This only exists
122     /// for making the extension trait work.
123     #[doc(hidden)]
as_bytes_mut(&mut self) -> &mut [u8]124     fn as_bytes_mut(&mut self) -> &mut [u8];
125 
126     /// Return this byte slice as a `&BStr`.
127     ///
128     /// Use `&BStr` is useful because of its `fmt::Debug` representation
129     /// and various other trait implementations (such as `PartialEq` and
130     /// `PartialOrd`). In particular, the `Debug` implementation for `BStr`
131     /// shows its bytes as a normal string. For invalid UTF-8, hex escape
132     /// sequences are used.
133     ///
134     /// # Examples
135     ///
136     /// Basic usage:
137     ///
138     /// ```
139     /// use bstr::ByteSlice;
140     ///
141     /// println!("{:?}", b"foo\xFFbar".as_bstr());
142     /// ```
143     #[inline]
as_bstr(&self) -> &BStr144     fn as_bstr(&self) -> &BStr {
145         BStr::new(self.as_bytes())
146     }
147 
148     /// Return this byte slice as a `&mut BStr`.
149     ///
150     /// Use `&mut BStr` is useful because of its `fmt::Debug` representation
151     /// and various other trait implementations (such as `PartialEq` and
152     /// `PartialOrd`). In particular, the `Debug` implementation for `BStr`
153     /// shows its bytes as a normal string. For invalid UTF-8, hex escape
154     /// sequences are used.
155     ///
156     /// # Examples
157     ///
158     /// Basic usage:
159     ///
160     /// ```
161     /// use bstr::ByteSlice;
162     ///
163     /// let mut bytes = *b"foo\xFFbar";
164     /// println!("{:?}", &mut bytes.as_bstr_mut());
165     /// ```
166     #[inline]
as_bstr_mut(&mut self) -> &mut BStr167     fn as_bstr_mut(&mut self) -> &mut BStr {
168         BStr::new_mut(self.as_bytes_mut())
169     }
170 
171     /// Create an immutable byte string from an OS string slice.
172     ///
173     /// When the underlying bytes of OS strings are accessible, then this
174     /// always succeeds and is zero cost. Otherwise, this returns `None` if the
175     /// given OS string is not valid UTF-8. (For example, when the underlying
176     /// bytes are inaccessible on Windows, file paths are allowed to be a
177     /// sequence of arbitrary 16-bit integers. Not all such sequences can be
178     /// transcoded to valid UTF-8.)
179     ///
180     /// # Examples
181     ///
182     /// Basic usage:
183     ///
184     /// ```
185     /// use std::ffi::OsStr;
186     ///
187     /// use bstr::{B, ByteSlice};
188     ///
189     /// let os_str = OsStr::new("foo");
190     /// let bs = <[u8]>::from_os_str(os_str).expect("should be valid UTF-8");
191     /// assert_eq!(bs, B("foo"));
192     /// ```
193     #[cfg(feature = "std")]
194     #[inline]
from_os_str(os_str: &OsStr) -> Option<&[u8]>195     fn from_os_str(os_str: &OsStr) -> Option<&[u8]> {
196         #[cfg(unix)]
197         #[inline]
198         fn imp(os_str: &OsStr) -> Option<&[u8]> {
199             use std::os::unix::ffi::OsStrExt;
200 
201             Some(os_str.as_bytes())
202         }
203 
204         #[cfg(not(unix))]
205         #[inline]
206         fn imp(os_str: &OsStr) -> Option<&[u8]> {
207             os_str.to_str().map(|s| s.as_bytes())
208         }
209 
210         imp(os_str)
211     }
212 
213     /// Create an immutable byte string from a file path.
214     ///
215     /// When the underlying bytes of paths are accessible, then this always
216     /// succeeds and is zero cost. Otherwise, this returns `None` if the given
217     /// path is not valid UTF-8. (For example, when the underlying bytes are
218     /// inaccessible on Windows, file paths are allowed to be a sequence of
219     /// arbitrary 16-bit integers. Not all such sequences can be transcoded to
220     /// valid UTF-8.)
221     ///
222     /// # Examples
223     ///
224     /// Basic usage:
225     ///
226     /// ```
227     /// use std::path::Path;
228     ///
229     /// use bstr::{B, ByteSlice};
230     ///
231     /// let path = Path::new("foo");
232     /// let bs = <[u8]>::from_path(path).expect("should be valid UTF-8");
233     /// assert_eq!(bs, B("foo"));
234     /// ```
235     #[cfg(feature = "std")]
236     #[inline]
from_path(path: &Path) -> Option<&[u8]>237     fn from_path(path: &Path) -> Option<&[u8]> {
238         Self::from_os_str(path.as_os_str())
239     }
240 
241     /// Safely convert this byte string into a `&str` if it's valid UTF-8.
242     ///
243     /// If this byte string is not valid UTF-8, then an error is returned. The
244     /// error returned indicates the first invalid byte found and the length
245     /// of the error.
246     ///
247     /// In cases where a lossy conversion to `&str` is acceptable, then use one
248     /// of the [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) or
249     /// [`to_str_lossy_into`](trait.ByteSlice.html#method.to_str_lossy_into)
250     /// methods.
251     ///
252     /// # Examples
253     ///
254     /// Basic usage:
255     ///
256     /// ```
257     /// # #[cfg(feature = "alloc")] {
258     /// use bstr::{B, ByteSlice, ByteVec};
259     ///
260     /// # fn example() -> Result<(), bstr::Utf8Error> {
261     /// let s = B("☃βツ").to_str()?;
262     /// assert_eq!("☃βツ", s);
263     ///
264     /// let mut bstring = <Vec<u8>>::from("☃βツ");
265     /// bstring.push(b'\xFF');
266     /// let err = bstring.to_str().unwrap_err();
267     /// assert_eq!(8, err.valid_up_to());
268     /// # Ok(()) }; example().unwrap()
269     /// # }
270     /// ```
271     #[inline]
to_str(&self) -> Result<&str, Utf8Error>272     fn to_str(&self) -> Result<&str, Utf8Error> {
273         utf8::validate(self.as_bytes()).map(|_| {
274             // SAFETY: This is safe because of the guarantees provided by
275             // utf8::validate.
276             unsafe { str::from_utf8_unchecked(self.as_bytes()) }
277         })
278     }
279 
280     /// Unsafely convert this byte string into a `&str`, without checking for
281     /// valid UTF-8.
282     ///
283     /// # Safety
284     ///
285     /// Callers *must* ensure that this byte string is valid UTF-8 before
286     /// calling this method. Converting a byte string into a `&str` that is
287     /// not valid UTF-8 is considered undefined behavior.
288     ///
289     /// This routine is useful in performance sensitive contexts where the
290     /// UTF-8 validity of the byte string is already known and it is
291     /// undesirable to pay the cost of an additional UTF-8 validation check
292     /// that [`to_str`](trait.ByteSlice.html#method.to_str) performs.
293     ///
294     /// # Examples
295     ///
296     /// Basic usage:
297     ///
298     /// ```
299     /// use bstr::{B, ByteSlice};
300     ///
301     /// // SAFETY: This is safe because string literals are guaranteed to be
302     /// // valid UTF-8 by the Rust compiler.
303     /// let s = unsafe { B("☃βツ").to_str_unchecked() };
304     /// assert_eq!("☃βツ", s);
305     /// ```
306     #[inline]
to_str_unchecked(&self) -> &str307     unsafe fn to_str_unchecked(&self) -> &str {
308         str::from_utf8_unchecked(self.as_bytes())
309     }
310 
311     /// Convert this byte string to a valid UTF-8 string by replacing invalid
312     /// UTF-8 bytes with the Unicode replacement codepoint (`U+FFFD`).
313     ///
314     /// If the byte string is already valid UTF-8, then no copying or
315     /// allocation is performed and a borrrowed string slice is returned. If
316     /// the byte string is not valid UTF-8, then an owned string buffer is
317     /// returned with invalid bytes replaced by the replacement codepoint.
318     ///
319     /// This method uses the "substitution of maximal subparts" (Unicode
320     /// Standard, Chapter 3, Section 9) strategy for inserting the replacement
321     /// codepoint. Specifically, a replacement codepoint is inserted whenever a
322     /// byte is found that cannot possibly lead to a valid code unit sequence.
323     /// If there were previous bytes that represented a prefix of a well-formed
324     /// code unit sequence, then all of those bytes are substituted with a
325     /// single replacement codepoint. The "substitution of maximal subparts"
326     /// strategy is the same strategy used by
327     /// [W3C's Encoding standard](https://www.w3.org/TR/encoding/).
328     /// For a more precise description of the maximal subpart strategy, see
329     /// the Unicode Standard, Chapter 3, Section 9. See also
330     /// [Public Review Issue #121](https://www.unicode.org/review/pr-121.html).
331     ///
332     /// N.B. Rust's standard library also appears to use the same strategy,
333     /// but it does not appear to be an API guarantee.
334     ///
335     /// # Examples
336     ///
337     /// Basic usage:
338     ///
339     /// ```
340     /// use std::borrow::Cow;
341     ///
342     /// use bstr::ByteSlice;
343     ///
344     /// let mut bstring = <Vec<u8>>::from("☃βツ");
345     /// assert_eq!(Cow::Borrowed("☃βツ"), bstring.to_str_lossy());
346     ///
347     /// // Add a byte that makes the sequence invalid.
348     /// bstring.push(b'\xFF');
349     /// assert_eq!(Cow::Borrowed("☃βツ\u{FFFD}"), bstring.to_str_lossy());
350     /// ```
351     ///
352     /// This demonstrates the "maximal subpart" substitution logic.
353     ///
354     /// ```
355     /// use bstr::{B, ByteSlice};
356     ///
357     /// // \x61 is the ASCII codepoint for 'a'.
358     /// // \xF1\x80\x80 is a valid 3-byte code unit prefix.
359     /// // \xE1\x80 is a valid 2-byte code unit prefix.
360     /// // \xC2 is a valid 1-byte code unit prefix.
361     /// // \x62 is the ASCII codepoint for 'b'.
362     /// //
363     /// // In sum, each of the prefixes is replaced by a single replacement
364     /// // codepoint since none of the prefixes are properly completed. This
365     /// // is in contrast to other strategies that might insert a replacement
366     /// // codepoint for every single byte.
367     /// let bs = B(b"\x61\xF1\x80\x80\xE1\x80\xC2\x62");
368     /// assert_eq!("a\u{FFFD}\u{FFFD}\u{FFFD}b", bs.to_str_lossy());
369     /// ```
370     #[cfg(feature = "alloc")]
371     #[inline]
to_str_lossy(&self) -> Cow<'_, str>372     fn to_str_lossy(&self) -> Cow<'_, str> {
373         match utf8::validate(self.as_bytes()) {
374             Ok(()) => {
375                 // SAFETY: This is safe because of the guarantees provided by
376                 // utf8::validate.
377                 unsafe {
378                     Cow::Borrowed(str::from_utf8_unchecked(self.as_bytes()))
379                 }
380             }
381             Err(err) => {
382                 let mut lossy = String::with_capacity(self.as_bytes().len());
383                 let (valid, after) =
384                     self.as_bytes().split_at(err.valid_up_to());
385                 // SAFETY: This is safe because utf8::validate guarantees
386                 // that all of `valid` is valid UTF-8.
387                 lossy.push_str(unsafe { str::from_utf8_unchecked(valid) });
388                 lossy.push_str("\u{FFFD}");
389                 if let Some(len) = err.error_len() {
390                     after[len..].to_str_lossy_into(&mut lossy);
391                 }
392                 Cow::Owned(lossy)
393             }
394         }
395     }
396 
397     /// Copy the contents of this byte string into the given owned string
398     /// buffer, while replacing invalid UTF-8 code unit sequences with the
399     /// Unicode replacement codepoint (`U+FFFD`).
400     ///
401     /// This method uses the same "substitution of maximal subparts" strategy
402     /// for inserting the replacement codepoint as the
403     /// [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) method.
404     ///
405     /// This routine is useful for amortizing allocation. However, unlike
406     /// `to_str_lossy`, this routine will _always_ copy the contents of this
407     /// byte string into the destination buffer, even if this byte string is
408     /// valid UTF-8.
409     ///
410     /// # Examples
411     ///
412     /// Basic usage:
413     ///
414     /// ```
415     /// use std::borrow::Cow;
416     ///
417     /// use bstr::ByteSlice;
418     ///
419     /// let mut bstring = <Vec<u8>>::from("☃βツ");
420     /// // Add a byte that makes the sequence invalid.
421     /// bstring.push(b'\xFF');
422     ///
423     /// let mut dest = String::new();
424     /// bstring.to_str_lossy_into(&mut dest);
425     /// assert_eq!("☃βツ\u{FFFD}", dest);
426     /// ```
427     #[cfg(feature = "alloc")]
428     #[inline]
to_str_lossy_into(&self, dest: &mut String)429     fn to_str_lossy_into(&self, dest: &mut String) {
430         let mut bytes = self.as_bytes();
431         dest.reserve(bytes.len());
432         loop {
433             match utf8::validate(bytes) {
434                 Ok(()) => {
435                     // SAFETY: This is safe because utf8::validate guarantees
436                     // that all of `bytes` is valid UTF-8.
437                     dest.push_str(unsafe { str::from_utf8_unchecked(bytes) });
438                     break;
439                 }
440                 Err(err) => {
441                     let (valid, after) = bytes.split_at(err.valid_up_to());
442                     // SAFETY: This is safe because utf8::validate guarantees
443                     // that all of `valid` is valid UTF-8.
444                     dest.push_str(unsafe { str::from_utf8_unchecked(valid) });
445                     dest.push_str("\u{FFFD}");
446                     match err.error_len() {
447                         None => break,
448                         Some(len) => bytes = &after[len..],
449                     }
450                 }
451             }
452         }
453     }
454 
455     /// Create an OS string slice from this byte string.
456     ///
457     /// When OS strings can be constructed from arbitrary byte sequences, this
458     /// always succeeds and is zero cost. Otherwise, this returns a UTF-8
459     /// decoding error if this byte string is not valid UTF-8. (For example,
460     /// assuming the representation of `OsStr` is opaque on Windows, file paths
461     /// are allowed to be a sequence of arbitrary 16-bit integers. There is
462     /// no obvious mapping from an arbitrary sequence of 8-bit integers to an
463     /// arbitrary sequence of 16-bit integers. If the representation of `OsStr`
464     /// is even opened up, then this will convert any sequence of bytes to an
465     /// `OsStr` without cost.)
466     ///
467     /// # Examples
468     ///
469     /// Basic usage:
470     ///
471     /// ```
472     /// use bstr::{B, ByteSlice};
473     ///
474     /// let os_str = b"foo".to_os_str().expect("should be valid UTF-8");
475     /// assert_eq!(os_str, "foo");
476     /// ```
477     #[cfg(feature = "std")]
478     #[inline]
to_os_str(&self) -> Result<&OsStr, Utf8Error>479     fn to_os_str(&self) -> Result<&OsStr, Utf8Error> {
480         #[cfg(unix)]
481         #[inline]
482         fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {
483             use std::os::unix::ffi::OsStrExt;
484 
485             Ok(OsStr::from_bytes(bytes))
486         }
487 
488         #[cfg(not(unix))]
489         #[inline]
490         fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {
491             bytes.to_str().map(OsStr::new)
492         }
493 
494         imp(self.as_bytes())
495     }
496 
497     /// Lossily create an OS string slice from this byte string.
498     ///
499     /// When OS strings can be constructed from arbitrary byte sequences, this
500     /// is zero cost and always returns a slice. Otherwise, this will perform a
501     /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
502     /// the Unicode replacement codepoint.
503     ///
504     /// Note that this can prevent the correct roundtripping of file paths when
505     /// the representation of `OsStr` is opaque.
506     ///
507     /// # Examples
508     ///
509     /// Basic usage:
510     ///
511     /// ```
512     /// use bstr::ByteSlice;
513     ///
514     /// let os_str = b"foo\xFFbar".to_os_str_lossy();
515     /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
516     /// ```
517     #[cfg(feature = "std")]
518     #[inline]
to_os_str_lossy(&self) -> Cow<'_, OsStr>519     fn to_os_str_lossy(&self) -> Cow<'_, OsStr> {
520         #[cfg(unix)]
521         #[inline]
522         fn imp(bytes: &[u8]) -> Cow<'_, OsStr> {
523             use std::os::unix::ffi::OsStrExt;
524 
525             Cow::Borrowed(OsStr::from_bytes(bytes))
526         }
527 
528         #[cfg(not(unix))]
529         #[inline]
530         fn imp(bytes: &[u8]) -> Cow<OsStr> {
531             use std::ffi::OsString;
532 
533             match bytes.to_str_lossy() {
534                 Cow::Borrowed(x) => Cow::Borrowed(OsStr::new(x)),
535                 Cow::Owned(x) => Cow::Owned(OsString::from(x)),
536             }
537         }
538 
539         imp(self.as_bytes())
540     }
541 
542     /// Create a path slice from this byte string.
543     ///
544     /// When paths can be constructed from arbitrary byte sequences, this
545     /// always succeeds and is zero cost. Otherwise, this returns a UTF-8
546     /// decoding error if this byte string is not valid UTF-8. (For example,
547     /// assuming the representation of `Path` is opaque on Windows, file paths
548     /// are allowed to be a sequence of arbitrary 16-bit integers. There is
549     /// no obvious mapping from an arbitrary sequence of 8-bit integers to an
550     /// arbitrary sequence of 16-bit integers. If the representation of `Path`
551     /// is even opened up, then this will convert any sequence of bytes to an
552     /// `Path` without cost.)
553     ///
554     /// # Examples
555     ///
556     /// Basic usage:
557     ///
558     /// ```
559     /// use bstr::ByteSlice;
560     ///
561     /// let path = b"foo".to_path().expect("should be valid UTF-8");
562     /// assert_eq!(path.as_os_str(), "foo");
563     /// ```
564     #[cfg(feature = "std")]
565     #[inline]
to_path(&self) -> Result<&Path, Utf8Error>566     fn to_path(&self) -> Result<&Path, Utf8Error> {
567         self.to_os_str().map(Path::new)
568     }
569 
570     /// Lossily create a path slice from this byte string.
571     ///
572     /// When paths can be constructed from arbitrary byte sequences, this is
573     /// zero cost and always returns a slice. Otherwise, this will perform a
574     /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
575     /// the Unicode replacement codepoint.
576     ///
577     /// Note that this can prevent the correct roundtripping of file paths when
578     /// the representation of `Path` is opaque.
579     ///
580     /// # Examples
581     ///
582     /// Basic usage:
583     ///
584     /// ```
585     /// use bstr::ByteSlice;
586     ///
587     /// let bs = b"foo\xFFbar";
588     /// let path = bs.to_path_lossy();
589     /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
590     /// ```
591     #[cfg(feature = "std")]
592     #[inline]
to_path_lossy(&self) -> Cow<'_, Path>593     fn to_path_lossy(&self) -> Cow<'_, Path> {
594         use std::path::PathBuf;
595 
596         match self.to_os_str_lossy() {
597             Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)),
598             Cow::Owned(x) => Cow::Owned(PathBuf::from(x)),
599         }
600     }
601 
602     /// Create a new byte string by repeating this byte string `n` times.
603     ///
604     /// # Panics
605     ///
606     /// This function panics if the capacity of the new byte string would
607     /// overflow.
608     ///
609     /// # Examples
610     ///
611     /// Basic usage:
612     ///
613     /// ```
614     /// use bstr::{B, ByteSlice};
615     ///
616     /// assert_eq!(b"foo".repeatn(4), B("foofoofoofoo"));
617     /// assert_eq!(b"foo".repeatn(0), B(""));
618     /// ```
619     #[cfg(feature = "alloc")]
620     #[inline]
repeatn(&self, n: usize) -> Vec<u8>621     fn repeatn(&self, n: usize) -> Vec<u8> {
622         self.as_bytes().repeat(n)
623     }
624 
625     /// Returns true if and only if this byte string contains the given needle.
626     ///
627     /// # Examples
628     ///
629     /// Basic usage:
630     ///
631     /// ```
632     /// use bstr::ByteSlice;
633     ///
634     /// assert!(b"foo bar".contains_str("foo"));
635     /// assert!(b"foo bar".contains_str("bar"));
636     /// assert!(!b"foo".contains_str("foobar"));
637     /// ```
638     #[inline]
contains_str<B: AsRef<[u8]>>(&self, needle: B) -> bool639     fn contains_str<B: AsRef<[u8]>>(&self, needle: B) -> bool {
640         self.find(needle).is_some()
641     }
642 
643     /// Returns true if and only if this byte string has the given prefix.
644     ///
645     /// # Examples
646     ///
647     /// Basic usage:
648     ///
649     /// ```
650     /// use bstr::ByteSlice;
651     ///
652     /// assert!(b"foo bar".starts_with_str("foo"));
653     /// assert!(!b"foo bar".starts_with_str("bar"));
654     /// assert!(!b"foo".starts_with_str("foobar"));
655     /// ```
656     #[inline]
starts_with_str<B: AsRef<[u8]>>(&self, prefix: B) -> bool657     fn starts_with_str<B: AsRef<[u8]>>(&self, prefix: B) -> bool {
658         self.as_bytes().starts_with(prefix.as_ref())
659     }
660 
661     /// Returns true if and only if this byte string has the given suffix.
662     ///
663     /// # Examples
664     ///
665     /// Basic usage:
666     ///
667     /// ```
668     /// use bstr::ByteSlice;
669     ///
670     /// assert!(b"foo bar".ends_with_str("bar"));
671     /// assert!(!b"foo bar".ends_with_str("foo"));
672     /// assert!(!b"bar".ends_with_str("foobar"));
673     /// ```
674     #[inline]
ends_with_str<B: AsRef<[u8]>>(&self, suffix: B) -> bool675     fn ends_with_str<B: AsRef<[u8]>>(&self, suffix: B) -> bool {
676         self.as_bytes().ends_with(suffix.as_ref())
677     }
678 
679     /// Returns the index of the first occurrence of the given needle.
680     ///
681     /// The needle may be any type that can be cheaply converted into a
682     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
683     ///
684     /// Note that if you're are searching for the same needle in many
685     /// different small haystacks, it may be faster to initialize a
686     /// [`Finder`](struct.Finder.html) once, and reuse it for each search.
687     ///
688     /// # Complexity
689     ///
690     /// This routine is guaranteed to have worst case linear time complexity
691     /// with respect to both the needle and the haystack. That is, this runs
692     /// in `O(needle.len() + haystack.len())` time.
693     ///
694     /// This routine is also guaranteed to have worst case constant space
695     /// complexity.
696     ///
697     /// # Examples
698     ///
699     /// Basic usage:
700     ///
701     /// ```
702     /// use bstr::ByteSlice;
703     ///
704     /// let s = b"foo bar baz";
705     /// assert_eq!(Some(0), s.find("foo"));
706     /// assert_eq!(Some(4), s.find("bar"));
707     /// assert_eq!(None, s.find("quux"));
708     /// ```
709     #[inline]
find<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize>710     fn find<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {
711         Finder::new(needle.as_ref()).find(self.as_bytes())
712     }
713 
714     /// Returns the index of the last occurrence of the given needle.
715     ///
716     /// The needle may be any type that can be cheaply converted into a
717     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
718     ///
719     /// Note that if you're are searching for the same needle in many
720     /// different small haystacks, it may be faster to initialize a
721     /// [`FinderReverse`](struct.FinderReverse.html) once, and reuse it for
722     /// each search.
723     ///
724     /// # Complexity
725     ///
726     /// This routine is guaranteed to have worst case linear time complexity
727     /// with respect to both the needle and the haystack. That is, this runs
728     /// in `O(needle.len() + haystack.len())` time.
729     ///
730     /// This routine is also guaranteed to have worst case constant space
731     /// complexity.
732     ///
733     /// # Examples
734     ///
735     /// Basic usage:
736     ///
737     /// ```
738     /// use bstr::ByteSlice;
739     ///
740     /// let s = b"foo bar baz";
741     /// assert_eq!(Some(0), s.rfind("foo"));
742     /// assert_eq!(Some(4), s.rfind("bar"));
743     /// assert_eq!(Some(8), s.rfind("ba"));
744     /// assert_eq!(None, s.rfind("quux"));
745     /// ```
746     #[inline]
rfind<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize>747     fn rfind<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {
748         FinderReverse::new(needle.as_ref()).rfind(self.as_bytes())
749     }
750 
751     /// Returns an iterator of the non-overlapping occurrences of the given
752     /// needle. The iterator yields byte offset positions indicating the start
753     /// of each match.
754     ///
755     /// # Complexity
756     ///
757     /// This routine is guaranteed to have worst case linear time complexity
758     /// with respect to both the needle and the haystack. That is, this runs
759     /// in `O(needle.len() + haystack.len())` time.
760     ///
761     /// This routine is also guaranteed to have worst case constant space
762     /// complexity.
763     ///
764     /// # Examples
765     ///
766     /// Basic usage:
767     ///
768     /// ```
769     /// use bstr::ByteSlice;
770     ///
771     /// let s = b"foo bar foo foo quux foo";
772     /// let matches: Vec<usize> = s.find_iter("foo").collect();
773     /// assert_eq!(matches, vec![0, 8, 12, 21]);
774     /// ```
775     ///
776     /// An empty string matches at every position, including the position
777     /// immediately following the last byte:
778     ///
779     /// ```
780     /// use bstr::ByteSlice;
781     ///
782     /// let matches: Vec<usize> = b"foo".find_iter("").collect();
783     /// assert_eq!(matches, vec![0, 1, 2, 3]);
784     ///
785     /// let matches: Vec<usize> = b"".find_iter("").collect();
786     /// assert_eq!(matches, vec![0]);
787     /// ```
788     #[inline]
find_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>( &'h self, needle: &'n B, ) -> Find<'h, 'n>789     fn find_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>(
790         &'h self,
791         needle: &'n B,
792     ) -> Find<'h, 'n> {
793         Find::new(self.as_bytes(), needle.as_ref())
794     }
795 
796     /// Returns an iterator of the non-overlapping occurrences of the given
797     /// needle in reverse. The iterator yields byte offset positions indicating
798     /// the start of each match.
799     ///
800     /// # Complexity
801     ///
802     /// This routine is guaranteed to have worst case linear time complexity
803     /// with respect to both the needle and the haystack. That is, this runs
804     /// in `O(needle.len() + haystack.len())` time.
805     ///
806     /// This routine is also guaranteed to have worst case constant space
807     /// complexity.
808     ///
809     /// # Examples
810     ///
811     /// Basic usage:
812     ///
813     /// ```
814     /// use bstr::ByteSlice;
815     ///
816     /// let s = b"foo bar foo foo quux foo";
817     /// let matches: Vec<usize> = s.rfind_iter("foo").collect();
818     /// assert_eq!(matches, vec![21, 12, 8, 0]);
819     /// ```
820     ///
821     /// An empty string matches at every position, including the position
822     /// immediately following the last byte:
823     ///
824     /// ```
825     /// use bstr::ByteSlice;
826     ///
827     /// let matches: Vec<usize> = b"foo".rfind_iter("").collect();
828     /// assert_eq!(matches, vec![3, 2, 1, 0]);
829     ///
830     /// let matches: Vec<usize> = b"".rfind_iter("").collect();
831     /// assert_eq!(matches, vec![0]);
832     /// ```
833     #[inline]
rfind_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>( &'h self, needle: &'n B, ) -> FindReverse<'h, 'n>834     fn rfind_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>(
835         &'h self,
836         needle: &'n B,
837     ) -> FindReverse<'h, 'n> {
838         FindReverse::new(self.as_bytes(), needle.as_ref())
839     }
840 
841     /// Returns the index of the first occurrence of the given byte. If the
842     /// byte does not occur in this byte string, then `None` is returned.
843     ///
844     /// # Examples
845     ///
846     /// Basic usage:
847     ///
848     /// ```
849     /// use bstr::ByteSlice;
850     ///
851     /// assert_eq!(Some(10), b"foo bar baz".find_byte(b'z'));
852     /// assert_eq!(None, b"foo bar baz".find_byte(b'y'));
853     /// ```
854     #[inline]
find_byte(&self, byte: u8) -> Option<usize>855     fn find_byte(&self, byte: u8) -> Option<usize> {
856         memchr(byte, self.as_bytes())
857     }
858 
859     /// Returns the index of the last occurrence of the given byte. If the
860     /// byte does not occur in this byte string, then `None` is returned.
861     ///
862     /// # Examples
863     ///
864     /// Basic usage:
865     ///
866     /// ```
867     /// use bstr::ByteSlice;
868     ///
869     /// assert_eq!(Some(10), b"foo bar baz".rfind_byte(b'z'));
870     /// assert_eq!(None, b"foo bar baz".rfind_byte(b'y'));
871     /// ```
872     #[inline]
rfind_byte(&self, byte: u8) -> Option<usize>873     fn rfind_byte(&self, byte: u8) -> Option<usize> {
874         memrchr(byte, self.as_bytes())
875     }
876 
877     /// Returns the index of the first occurrence of the given codepoint.
878     /// If the codepoint does not occur in this byte string, then `None` is
879     /// returned.
880     ///
881     /// Note that if one searches for the replacement codepoint, `\u{FFFD}`,
882     /// then only explicit occurrences of that encoding will be found. Invalid
883     /// UTF-8 sequences will not be matched.
884     ///
885     /// # Examples
886     ///
887     /// Basic usage:
888     ///
889     /// ```
890     /// use bstr::{B, ByteSlice};
891     ///
892     /// assert_eq!(Some(10), b"foo bar baz".find_char('z'));
893     /// assert_eq!(Some(4), B("αβγγδ").find_char('γ'));
894     /// assert_eq!(None, b"foo bar baz".find_char('y'));
895     /// ```
896     #[inline]
find_char(&self, ch: char) -> Option<usize>897     fn find_char(&self, ch: char) -> Option<usize> {
898         self.find(ch.encode_utf8(&mut [0; 4]))
899     }
900 
901     /// Returns the index of the last occurrence of the given codepoint.
902     /// If the codepoint does not occur in this byte string, then `None` is
903     /// returned.
904     ///
905     /// Note that if one searches for the replacement codepoint, `\u{FFFD}`,
906     /// then only explicit occurrences of that encoding will be found. Invalid
907     /// UTF-8 sequences will not be matched.
908     ///
909     /// # Examples
910     ///
911     /// Basic usage:
912     ///
913     /// ```
914     /// use bstr::{B, ByteSlice};
915     ///
916     /// assert_eq!(Some(10), b"foo bar baz".rfind_char('z'));
917     /// assert_eq!(Some(6), B("αβγγδ").rfind_char('γ'));
918     /// assert_eq!(None, b"foo bar baz".rfind_char('y'));
919     /// ```
920     #[inline]
rfind_char(&self, ch: char) -> Option<usize>921     fn rfind_char(&self, ch: char) -> Option<usize> {
922         self.rfind(ch.encode_utf8(&mut [0; 4]))
923     }
924 
925     /// Returns the index of the first occurrence of any of the bytes in the
926     /// provided set.
927     ///
928     /// The `byteset` may be any type that can be cheaply converted into a
929     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
930     /// note that passing a `&str` which contains multibyte characters may not
931     /// behave as you expect: each byte in the `&str` is treated as an
932     /// individual member of the byte set.
933     ///
934     /// Note that order is irrelevant for the `byteset` parameter, and
935     /// duplicate bytes present in its body are ignored.
936     ///
937     /// # Complexity
938     ///
939     /// This routine is guaranteed to have worst case linear time complexity
940     /// with respect to both the set of bytes and the haystack. That is, this
941     /// runs in `O(byteset.len() + haystack.len())` time.
942     ///
943     /// This routine is also guaranteed to have worst case constant space
944     /// complexity.
945     ///
946     /// # Examples
947     ///
948     /// Basic usage:
949     ///
950     /// ```
951     /// use bstr::ByteSlice;
952     ///
953     /// assert_eq!(b"foo bar baz".find_byteset(b"zr"), Some(6));
954     /// assert_eq!(b"foo baz bar".find_byteset(b"bzr"), Some(4));
955     /// assert_eq!(None, b"foo baz bar".find_byteset(b"\t\n"));
956     /// // The empty byteset never matches.
957     /// assert_eq!(None, b"abc".find_byteset(b""));
958     /// assert_eq!(None, b"".find_byteset(b""));
959     /// ```
960     #[inline]
find_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize>961     fn find_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
962         byteset::find(self.as_bytes(), byteset.as_ref())
963     }
964 
965     /// Returns the index of the first occurrence of a byte that is not a
966     /// member of the provided set.
967     ///
968     /// The `byteset` may be any type that can be cheaply converted into a
969     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
970     /// note that passing a `&str` which contains multibyte characters may not
971     /// behave as you expect: each byte in the `&str` is treated as an
972     /// individual member of the byte set.
973     ///
974     /// Note that order is irrelevant for the `byteset` parameter, and
975     /// duplicate bytes present in its body are ignored.
976     ///
977     /// # Complexity
978     ///
979     /// This routine is guaranteed to have worst case linear time complexity
980     /// with respect to both the set of bytes and the haystack. That is, this
981     /// runs in `O(byteset.len() + haystack.len())` time.
982     ///
983     /// This routine is also guaranteed to have worst case constant space
984     /// complexity.
985     ///
986     /// # Examples
987     ///
988     /// Basic usage:
989     ///
990     /// ```
991     /// use bstr::ByteSlice;
992     ///
993     /// assert_eq!(b"foo bar baz".find_not_byteset(b"fo "), Some(4));
994     /// assert_eq!(b"\t\tbaz bar".find_not_byteset(b" \t\r\n"), Some(2));
995     /// assert_eq!(b"foo\nbaz\tbar".find_not_byteset(b"\t\n"), Some(0));
996     /// // The negation of the empty byteset matches everything.
997     /// assert_eq!(Some(0), b"abc".find_not_byteset(b""));
998     /// // But an empty string never contains anything.
999     /// assert_eq!(None, b"".find_not_byteset(b""));
1000     /// ```
1001     #[inline]
find_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize>1002     fn find_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
1003         byteset::find_not(self.as_bytes(), byteset.as_ref())
1004     }
1005 
1006     /// Returns the index of the last occurrence of any of the bytes in the
1007     /// provided set.
1008     ///
1009     /// The `byteset` may be any type that can be cheaply converted into a
1010     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
1011     /// note that passing a `&str` which contains multibyte characters may not
1012     /// behave as you expect: each byte in the `&str` is treated as an
1013     /// individual member of the byte set.
1014     ///
1015     /// Note that order is irrelevant for the `byteset` parameter, and duplicate
1016     /// bytes present in its body are ignored.
1017     ///
1018     /// # Complexity
1019     ///
1020     /// This routine is guaranteed to have worst case linear time complexity
1021     /// with respect to both the set of bytes and the haystack. That is, this
1022     /// runs in `O(byteset.len() + haystack.len())` time.
1023     ///
1024     /// This routine is also guaranteed to have worst case constant space
1025     /// complexity.
1026     ///
1027     /// # Examples
1028     ///
1029     /// Basic usage:
1030     ///
1031     /// ```
1032     /// use bstr::ByteSlice;
1033     ///
1034     /// assert_eq!(b"foo bar baz".rfind_byteset(b"agb"), Some(9));
1035     /// assert_eq!(b"foo baz bar".rfind_byteset(b"rabz "), Some(10));
1036     /// assert_eq!(b"foo baz bar".rfind_byteset(b"\n123"), None);
1037     /// ```
1038     #[inline]
rfind_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize>1039     fn rfind_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
1040         byteset::rfind(self.as_bytes(), byteset.as_ref())
1041     }
1042 
1043     /// Returns the index of the last occurrence of a byte that is not a member
1044     /// of the provided set.
1045     ///
1046     /// The `byteset` may be any type that can be cheaply converted into a
1047     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
1048     /// note that passing a `&str` which contains multibyte characters may not
1049     /// behave as you expect: each byte in the `&str` is treated as an
1050     /// individual member of the byte set.
1051     ///
1052     /// Note that order is irrelevant for the `byteset` parameter, and
1053     /// duplicate bytes present in its body are ignored.
1054     ///
1055     /// # Complexity
1056     ///
1057     /// This routine is guaranteed to have worst case linear time complexity
1058     /// with respect to both the set of bytes and the haystack. That is, this
1059     /// runs in `O(byteset.len() + haystack.len())` time.
1060     ///
1061     /// This routine is also guaranteed to have worst case constant space
1062     /// complexity.
1063     ///
1064     /// # Examples
1065     ///
1066     /// Basic usage:
1067     ///
1068     /// ```
1069     /// use bstr::ByteSlice;
1070     ///
1071     /// assert_eq!(b"foo bar baz,\t".rfind_not_byteset(b",\t"), Some(10));
1072     /// assert_eq!(b"foo baz bar".rfind_not_byteset(b"rabz "), Some(2));
1073     /// assert_eq!(None, b"foo baz bar".rfind_not_byteset(b"barfoz "));
1074     /// ```
1075     #[inline]
rfind_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize>1076     fn rfind_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
1077         byteset::rfind_not(self.as_bytes(), byteset.as_ref())
1078     }
1079 
1080     /// Returns an iterator over the fields in a byte string, separated
1081     /// by contiguous whitespace (according to the Unicode property
1082     /// `White_Space`).
1083     ///
1084     /// # Example
1085     ///
1086     /// Basic usage:
1087     ///
1088     /// ```
1089     /// use bstr::{B, ByteSlice};
1090     ///
1091     /// let s = B("  foo\tbar\t\u{2003}\nquux   \n");
1092     /// let fields: Vec<&[u8]> = s.fields().collect();
1093     /// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
1094     /// ```
1095     ///
1096     /// A byte string consisting of just whitespace yields no elements:
1097     ///
1098     /// ```
1099     /// use bstr::{B, ByteSlice};
1100     ///
1101     /// assert_eq!(0, B("  \n\t\u{2003}\n  \t").fields().count());
1102     /// ```
1103     #[cfg(feature = "unicode")]
1104     #[inline]
fields(&self) -> Fields<'_>1105     fn fields(&self) -> Fields<'_> {
1106         Fields::new(self.as_bytes())
1107     }
1108 
1109     /// Returns an iterator over the fields in a byte string, separated by
1110     /// contiguous codepoints satisfying the given predicate.
1111     ///
1112     /// If this byte string is not valid UTF-8, then the given closure will
1113     /// be called with a Unicode replacement codepoint when invalid UTF-8
1114     /// bytes are seen.
1115     ///
1116     /// # Example
1117     ///
1118     /// Basic usage:
1119     ///
1120     /// ```
1121     /// use bstr::{B, ByteSlice};
1122     ///
1123     /// let s = b"123foo999999bar1quux123456";
1124     /// let fields: Vec<&[u8]> = s.fields_with(|c| c.is_numeric()).collect();
1125     /// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
1126     /// ```
1127     ///
1128     /// A byte string consisting of all codepoints satisfying the predicate
1129     /// yields no elements:
1130     ///
1131     /// ```
1132     /// use bstr::ByteSlice;
1133     ///
1134     /// assert_eq!(0, b"1911354563".fields_with(|c| c.is_numeric()).count());
1135     /// ```
1136     #[inline]
fields_with<F: FnMut(char) -> bool>(&self, f: F) -> FieldsWith<'_, F>1137     fn fields_with<F: FnMut(char) -> bool>(&self, f: F) -> FieldsWith<'_, F> {
1138         FieldsWith::new(self.as_bytes(), f)
1139     }
1140 
1141     /// Returns an iterator over substrings of this byte string, separated
1142     /// by the given byte string. Each element yielded is guaranteed not to
1143     /// include the splitter substring.
1144     ///
1145     /// The splitter may be any type that can be cheaply converted into a
1146     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1147     ///
1148     /// # Examples
1149     ///
1150     /// Basic usage:
1151     ///
1152     /// ```
1153     /// use bstr::{B, ByteSlice};
1154     ///
1155     /// let x: Vec<&[u8]> = b"Mary had a little lamb".split_str(" ").collect();
1156     /// assert_eq!(x, vec![
1157     ///     B("Mary"), B("had"), B("a"), B("little"), B("lamb"),
1158     /// ]);
1159     ///
1160     /// let x: Vec<&[u8]> = b"".split_str("X").collect();
1161     /// assert_eq!(x, vec![b""]);
1162     ///
1163     /// let x: Vec<&[u8]> = b"lionXXtigerXleopard".split_str("X").collect();
1164     /// assert_eq!(x, vec![B("lion"), B(""), B("tiger"), B("leopard")]);
1165     ///
1166     /// let x: Vec<&[u8]> = b"lion::tiger::leopard".split_str("::").collect();
1167     /// assert_eq!(x, vec![B("lion"), B("tiger"), B("leopard")]);
1168     /// ```
1169     ///
1170     /// If a string contains multiple contiguous separators, you will end up
1171     /// with empty strings yielded by the iterator:
1172     ///
1173     /// ```
1174     /// use bstr::{B, ByteSlice};
1175     ///
1176     /// let x: Vec<&[u8]> = b"||||a||b|c".split_str("|").collect();
1177     /// assert_eq!(x, vec![
1178     ///     B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
1179     /// ]);
1180     ///
1181     /// let x: Vec<&[u8]> = b"(///)".split_str("/").collect();
1182     /// assert_eq!(x, vec![B("("), B(""), B(""), B(")")]);
1183     /// ```
1184     ///
1185     /// Separators at the start or end of a string are neighbored by empty
1186     /// strings.
1187     ///
1188     /// ```
1189     /// use bstr::{B, ByteSlice};
1190     ///
1191     /// let x: Vec<&[u8]> = b"010".split_str("0").collect();
1192     /// assert_eq!(x, vec![B(""), B("1"), B("")]);
1193     /// ```
1194     ///
1195     /// When the empty string is used as a separator, it splits every **byte**
1196     /// in the byte string, along with the beginning and end of the byte
1197     /// string.
1198     ///
1199     /// ```
1200     /// use bstr::{B, ByteSlice};
1201     ///
1202     /// let x: Vec<&[u8]> = b"rust".split_str("").collect();
1203     /// assert_eq!(x, vec![
1204     ///     B(""), B("r"), B("u"), B("s"), B("t"), B(""),
1205     /// ]);
1206     ///
1207     /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
1208     /// // may not be valid UTF-8!
1209     /// let x: Vec<&[u8]> = B("☃").split_str("").collect();
1210     /// assert_eq!(x, vec![
1211     ///     B(""), B(b"\xE2"), B(b"\x98"), B(b"\x83"), B(""),
1212     /// ]);
1213     /// ```
1214     ///
1215     /// Contiguous separators, especially whitespace, can lead to possibly
1216     /// surprising behavior. For example, this code is correct:
1217     ///
1218     /// ```
1219     /// use bstr::{B, ByteSlice};
1220     ///
1221     /// let x: Vec<&[u8]> = b"    a  b c".split_str(" ").collect();
1222     /// assert_eq!(x, vec![
1223     ///     B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
1224     /// ]);
1225     /// ```
1226     ///
1227     /// It does *not* give you `["a", "b", "c"]`. For that behavior, use
1228     /// [`fields`](#method.fields) instead.
1229     #[inline]
split_str<'h, 's, B: ?Sized + AsRef<[u8]>>( &'h self, splitter: &'s B, ) -> Split<'h, 's>1230     fn split_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
1231         &'h self,
1232         splitter: &'s B,
1233     ) -> Split<'h, 's> {
1234         Split::new(self.as_bytes(), splitter.as_ref())
1235     }
1236 
1237     /// Returns an iterator over substrings of this byte string, separated by
1238     /// the given byte string, in reverse. Each element yielded is guaranteed
1239     /// not to include the splitter substring.
1240     ///
1241     /// The splitter may be any type that can be cheaply converted into a
1242     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1243     ///
1244     /// # Examples
1245     ///
1246     /// Basic usage:
1247     ///
1248     /// ```
1249     /// use bstr::{B, ByteSlice};
1250     ///
1251     /// let x: Vec<&[u8]> =
1252     ///     b"Mary had a little lamb".rsplit_str(" ").collect();
1253     /// assert_eq!(x, vec![
1254     ///     B("lamb"), B("little"), B("a"), B("had"), B("Mary"),
1255     /// ]);
1256     ///
1257     /// let x: Vec<&[u8]> = b"".rsplit_str("X").collect();
1258     /// assert_eq!(x, vec![b""]);
1259     ///
1260     /// let x: Vec<&[u8]> = b"lionXXtigerXleopard".rsplit_str("X").collect();
1261     /// assert_eq!(x, vec![B("leopard"), B("tiger"), B(""), B("lion")]);
1262     ///
1263     /// let x: Vec<&[u8]> = b"lion::tiger::leopard".rsplit_str("::").collect();
1264     /// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lion")]);
1265     /// ```
1266     ///
1267     /// If a string contains multiple contiguous separators, you will end up
1268     /// with empty strings yielded by the iterator:
1269     ///
1270     /// ```
1271     /// use bstr::{B, ByteSlice};
1272     ///
1273     /// let x: Vec<&[u8]> = b"||||a||b|c".rsplit_str("|").collect();
1274     /// assert_eq!(x, vec![
1275     ///     B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
1276     /// ]);
1277     ///
1278     /// let x: Vec<&[u8]> = b"(///)".rsplit_str("/").collect();
1279     /// assert_eq!(x, vec![B(")"), B(""), B(""), B("(")]);
1280     /// ```
1281     ///
1282     /// Separators at the start or end of a string are neighbored by empty
1283     /// strings.
1284     ///
1285     /// ```
1286     /// use bstr::{B, ByteSlice};
1287     ///
1288     /// let x: Vec<&[u8]> = b"010".rsplit_str("0").collect();
1289     /// assert_eq!(x, vec![B(""), B("1"), B("")]);
1290     /// ```
1291     ///
1292     /// When the empty string is used as a separator, it splits every **byte**
1293     /// in the byte string, along with the beginning and end of the byte
1294     /// string.
1295     ///
1296     /// ```
1297     /// use bstr::{B, ByteSlice};
1298     ///
1299     /// let x: Vec<&[u8]> = b"rust".rsplit_str("").collect();
1300     /// assert_eq!(x, vec![
1301     ///     B(""), B("t"), B("s"), B("u"), B("r"), B(""),
1302     /// ]);
1303     ///
1304     /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
1305     /// // may not be valid UTF-8!
1306     /// let x: Vec<&[u8]> = B("☃").rsplit_str("").collect();
1307     /// assert_eq!(x, vec![B(""), B(b"\x83"), B(b"\x98"), B(b"\xE2"), B("")]);
1308     /// ```
1309     ///
1310     /// Contiguous separators, especially whitespace, can lead to possibly
1311     /// surprising behavior. For example, this code is correct:
1312     ///
1313     /// ```
1314     /// use bstr::{B, ByteSlice};
1315     ///
1316     /// let x: Vec<&[u8]> = b"    a  b c".rsplit_str(" ").collect();
1317     /// assert_eq!(x, vec![
1318     ///     B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
1319     /// ]);
1320     /// ```
1321     ///
1322     /// It does *not* give you `["a", "b", "c"]`.
1323     #[inline]
rsplit_str<'h, 's, B: ?Sized + AsRef<[u8]>>( &'h self, splitter: &'s B, ) -> SplitReverse<'h, 's>1324     fn rsplit_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
1325         &'h self,
1326         splitter: &'s B,
1327     ) -> SplitReverse<'h, 's> {
1328         SplitReverse::new(self.as_bytes(), splitter.as_ref())
1329     }
1330 
1331     /// Split this byte string at the first occurrence of `splitter`.
1332     ///
1333     /// If the `splitter` is found in the byte string, returns a tuple
1334     /// containing the parts of the string before and after the first occurrence
1335     /// of `splitter` respectively. Otherwise, if there are no occurrences of
1336     /// `splitter` in the byte string, returns `None`.
1337     ///
1338     /// The splitter may be any type that can be cheaply converted into a
1339     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1340     ///
1341     /// If you need to split on the *last* instance of a delimiter instead, see
1342     /// the [`ByteSlice::rsplit_once_str`](#method.rsplit_once_str) method .
1343     ///
1344     /// # Examples
1345     ///
1346     /// Basic usage:
1347     ///
1348     /// ```
1349     /// use bstr::{B, ByteSlice};
1350     ///
1351     /// assert_eq!(
1352     ///     B("foo,bar").split_once_str(","),
1353     ///     Some((B("foo"), B("bar"))),
1354     /// );
1355     /// assert_eq!(
1356     ///     B("foo,bar,baz").split_once_str(","),
1357     ///     Some((B("foo"), B("bar,baz"))),
1358     /// );
1359     /// assert_eq!(B("foo").split_once_str(","), None);
1360     /// assert_eq!(B("foo,").split_once_str(b","), Some((B("foo"), B(""))));
1361     /// assert_eq!(B(",foo").split_once_str(b","), Some((B(""), B("foo"))));
1362     /// ```
1363     #[inline]
split_once_str<'a, B: ?Sized + AsRef<[u8]>>( &'a self, splitter: &B, ) -> Option<(&'a [u8], &'a [u8])>1364     fn split_once_str<'a, B: ?Sized + AsRef<[u8]>>(
1365         &'a self,
1366         splitter: &B,
1367     ) -> Option<(&'a [u8], &'a [u8])> {
1368         let bytes = self.as_bytes();
1369         let splitter = splitter.as_ref();
1370         let start = Finder::new(splitter).find(bytes)?;
1371         let end = start + splitter.len();
1372         Some((&bytes[..start], &bytes[end..]))
1373     }
1374 
1375     /// Split this byte string at the last occurrence of `splitter`.
1376     ///
1377     /// If the `splitter` is found in the byte string, returns a tuple
1378     /// containing the parts of the string before and after the last occurrence
1379     /// of `splitter`, respectively. Otherwise, if there are no occurrences of
1380     /// `splitter` in the byte string, returns `None`.
1381     ///
1382     /// The splitter may be any type that can be cheaply converted into a
1383     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1384     ///
1385     /// If you need to split on the *first* instance of a delimiter instead, see
1386     /// the [`ByteSlice::split_once_str`](#method.split_once_str) method.
1387     ///
1388     /// # Examples
1389     ///
1390     /// Basic usage:
1391     ///
1392     /// ```
1393     /// use bstr::{B, ByteSlice};
1394     ///
1395     /// assert_eq!(
1396     ///     B("foo,bar").rsplit_once_str(","),
1397     ///     Some((B("foo"), B("bar"))),
1398     /// );
1399     /// assert_eq!(
1400     ///     B("foo,bar,baz").rsplit_once_str(","),
1401     ///     Some((B("foo,bar"), B("baz"))),
1402     /// );
1403     /// assert_eq!(B("foo").rsplit_once_str(","), None);
1404     /// assert_eq!(B("foo,").rsplit_once_str(b","), Some((B("foo"), B(""))));
1405     /// assert_eq!(B(",foo").rsplit_once_str(b","), Some((B(""), B("foo"))));
1406     /// ```
1407     #[inline]
rsplit_once_str<'a, B: ?Sized + AsRef<[u8]>>( &'a self, splitter: &B, ) -> Option<(&'a [u8], &'a [u8])>1408     fn rsplit_once_str<'a, B: ?Sized + AsRef<[u8]>>(
1409         &'a self,
1410         splitter: &B,
1411     ) -> Option<(&'a [u8], &'a [u8])> {
1412         let bytes = self.as_bytes();
1413         let splitter = splitter.as_ref();
1414         let start = FinderReverse::new(splitter).rfind(bytes)?;
1415         let end = start + splitter.len();
1416         Some((&bytes[..start], &bytes[end..]))
1417     }
1418 
1419     /// Returns an iterator of at most `limit` substrings of this byte string,
1420     /// separated by the given byte string. If `limit` substrings are yielded,
1421     /// then the last substring will contain the remainder of this byte string.
1422     ///
1423     /// The needle may be any type that can be cheaply converted into a
1424     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1425     ///
1426     /// # Examples
1427     ///
1428     /// Basic usage:
1429     ///
1430     /// ```
1431     /// use bstr::{B, ByteSlice};
1432     ///
1433     /// let x: Vec<_> = b"Mary had a little lamb".splitn_str(3, " ").collect();
1434     /// assert_eq!(x, vec![B("Mary"), B("had"), B("a little lamb")]);
1435     ///
1436     /// let x: Vec<_> = b"".splitn_str(3, "X").collect();
1437     /// assert_eq!(x, vec![b""]);
1438     ///
1439     /// let x: Vec<_> = b"lionXXtigerXleopard".splitn_str(3, "X").collect();
1440     /// assert_eq!(x, vec![B("lion"), B(""), B("tigerXleopard")]);
1441     ///
1442     /// let x: Vec<_> = b"lion::tiger::leopard".splitn_str(2, "::").collect();
1443     /// assert_eq!(x, vec![B("lion"), B("tiger::leopard")]);
1444     ///
1445     /// let x: Vec<_> = b"abcXdef".splitn_str(1, "X").collect();
1446     /// assert_eq!(x, vec![B("abcXdef")]);
1447     ///
1448     /// let x: Vec<_> = b"abcdef".splitn_str(2, "X").collect();
1449     /// assert_eq!(x, vec![B("abcdef")]);
1450     ///
1451     /// let x: Vec<_> = b"abcXdef".splitn_str(0, "X").collect();
1452     /// assert!(x.is_empty());
1453     /// ```
1454     #[inline]
splitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>( &'h self, limit: usize, splitter: &'s B, ) -> SplitN<'h, 's>1455     fn splitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
1456         &'h self,
1457         limit: usize,
1458         splitter: &'s B,
1459     ) -> SplitN<'h, 's> {
1460         SplitN::new(self.as_bytes(), splitter.as_ref(), limit)
1461     }
1462 
1463     /// Returns an iterator of at most `limit` substrings of this byte string,
1464     /// separated by the given byte string, in reverse. If `limit` substrings
1465     /// are yielded, then the last substring will contain the remainder of this
1466     /// byte string.
1467     ///
1468     /// The needle may be any type that can be cheaply converted into a
1469     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1470     ///
1471     /// # Examples
1472     ///
1473     /// Basic usage:
1474     ///
1475     /// ```
1476     /// use bstr::{B, ByteSlice};
1477     ///
1478     /// let x: Vec<_> =
1479     ///     b"Mary had a little lamb".rsplitn_str(3, " ").collect();
1480     /// assert_eq!(x, vec![B("lamb"), B("little"), B("Mary had a")]);
1481     ///
1482     /// let x: Vec<_> = b"".rsplitn_str(3, "X").collect();
1483     /// assert_eq!(x, vec![b""]);
1484     ///
1485     /// let x: Vec<_> = b"lionXXtigerXleopard".rsplitn_str(3, "X").collect();
1486     /// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lionX")]);
1487     ///
1488     /// let x: Vec<_> = b"lion::tiger::leopard".rsplitn_str(2, "::").collect();
1489     /// assert_eq!(x, vec![B("leopard"), B("lion::tiger")]);
1490     ///
1491     /// let x: Vec<_> = b"abcXdef".rsplitn_str(1, "X").collect();
1492     /// assert_eq!(x, vec![B("abcXdef")]);
1493     ///
1494     /// let x: Vec<_> = b"abcdef".rsplitn_str(2, "X").collect();
1495     /// assert_eq!(x, vec![B("abcdef")]);
1496     ///
1497     /// let x: Vec<_> = b"abcXdef".rsplitn_str(0, "X").collect();
1498     /// assert!(x.is_empty());
1499     /// ```
1500     #[inline]
rsplitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>( &'h self, limit: usize, splitter: &'s B, ) -> SplitNReverse<'h, 's>1501     fn rsplitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
1502         &'h self,
1503         limit: usize,
1504         splitter: &'s B,
1505     ) -> SplitNReverse<'h, 's> {
1506         SplitNReverse::new(self.as_bytes(), splitter.as_ref(), limit)
1507     }
1508 
1509     /// Replace all matches of the given needle with the given replacement, and
1510     /// the result as a new `Vec<u8>`.
1511     ///
1512     /// This routine is useful as a convenience. If you need to reuse an
1513     /// allocation, use [`replace_into`](#method.replace_into) instead.
1514     ///
1515     /// # Examples
1516     ///
1517     /// Basic usage:
1518     ///
1519     /// ```
1520     /// use bstr::ByteSlice;
1521     ///
1522     /// let s = b"this is old".replace("old", "new");
1523     /// assert_eq!(s, "this is new".as_bytes());
1524     /// ```
1525     ///
1526     /// When the pattern doesn't match:
1527     ///
1528     /// ```
1529     /// use bstr::ByteSlice;
1530     ///
1531     /// let s = b"this is old".replace("nada nada", "limonada");
1532     /// assert_eq!(s, "this is old".as_bytes());
1533     /// ```
1534     ///
1535     /// When the needle is an empty string:
1536     ///
1537     /// ```
1538     /// use bstr::ByteSlice;
1539     ///
1540     /// let s = b"foo".replace("", "Z");
1541     /// assert_eq!(s, "ZfZoZoZ".as_bytes());
1542     /// ```
1543     #[cfg(feature = "alloc")]
1544     #[inline]
replace<N: AsRef<[u8]>, R: AsRef<[u8]>>( &self, needle: N, replacement: R, ) -> Vec<u8>1545     fn replace<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1546         &self,
1547         needle: N,
1548         replacement: R,
1549     ) -> Vec<u8> {
1550         let mut dest = Vec::with_capacity(self.as_bytes().len());
1551         self.replace_into(needle, replacement, &mut dest);
1552         dest
1553     }
1554 
1555     /// Replace up to `limit` matches of the given needle with the given
1556     /// replacement, and the result as a new `Vec<u8>`.
1557     ///
1558     /// This routine is useful as a convenience. If you need to reuse an
1559     /// allocation, use [`replacen_into`](#method.replacen_into) instead.
1560     ///
1561     /// # Examples
1562     ///
1563     /// Basic usage:
1564     ///
1565     /// ```
1566     /// use bstr::ByteSlice;
1567     ///
1568     /// let s = b"foofoo".replacen("o", "z", 2);
1569     /// assert_eq!(s, "fzzfoo".as_bytes());
1570     /// ```
1571     ///
1572     /// When the pattern doesn't match:
1573     ///
1574     /// ```
1575     /// use bstr::ByteSlice;
1576     ///
1577     /// let s = b"foofoo".replacen("a", "z", 2);
1578     /// assert_eq!(s, "foofoo".as_bytes());
1579     /// ```
1580     ///
1581     /// When the needle is an empty string:
1582     ///
1583     /// ```
1584     /// use bstr::ByteSlice;
1585     ///
1586     /// let s = b"foo".replacen("", "Z", 2);
1587     /// assert_eq!(s, "ZfZoo".as_bytes());
1588     /// ```
1589     #[cfg(feature = "alloc")]
1590     #[inline]
replacen<N: AsRef<[u8]>, R: AsRef<[u8]>>( &self, needle: N, replacement: R, limit: usize, ) -> Vec<u8>1591     fn replacen<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1592         &self,
1593         needle: N,
1594         replacement: R,
1595         limit: usize,
1596     ) -> Vec<u8> {
1597         let mut dest = Vec::with_capacity(self.as_bytes().len());
1598         self.replacen_into(needle, replacement, limit, &mut dest);
1599         dest
1600     }
1601 
1602     /// Replace all matches of the given needle with the given replacement,
1603     /// and write the result into the provided `Vec<u8>`.
1604     ///
1605     /// This does **not** clear `dest` before writing to it.
1606     ///
1607     /// This routine is useful for reusing allocation. For a more convenient
1608     /// API, use [`replace`](#method.replace) instead.
1609     ///
1610     /// # Examples
1611     ///
1612     /// Basic usage:
1613     ///
1614     /// ```
1615     /// use bstr::ByteSlice;
1616     ///
1617     /// let s = b"this is old";
1618     ///
1619     /// let mut dest = vec![];
1620     /// s.replace_into("old", "new", &mut dest);
1621     /// assert_eq!(dest, "this is new".as_bytes());
1622     /// ```
1623     ///
1624     /// When the pattern doesn't match:
1625     ///
1626     /// ```
1627     /// use bstr::ByteSlice;
1628     ///
1629     /// let s = b"this is old";
1630     ///
1631     /// let mut dest = vec![];
1632     /// s.replace_into("nada nada", "limonada", &mut dest);
1633     /// assert_eq!(dest, "this is old".as_bytes());
1634     /// ```
1635     ///
1636     /// When the needle is an empty string:
1637     ///
1638     /// ```
1639     /// use bstr::ByteSlice;
1640     ///
1641     /// let s = b"foo";
1642     ///
1643     /// let mut dest = vec![];
1644     /// s.replace_into("", "Z", &mut dest);
1645     /// assert_eq!(dest, "ZfZoZoZ".as_bytes());
1646     /// ```
1647     #[cfg(feature = "alloc")]
1648     #[inline]
replace_into<N: AsRef<[u8]>, R: AsRef<[u8]>>( &self, needle: N, replacement: R, dest: &mut Vec<u8>, )1649     fn replace_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1650         &self,
1651         needle: N,
1652         replacement: R,
1653         dest: &mut Vec<u8>,
1654     ) {
1655         let (needle, replacement) = (needle.as_ref(), replacement.as_ref());
1656 
1657         let mut last = 0;
1658         for start in self.find_iter(needle) {
1659             dest.push_str(&self.as_bytes()[last..start]);
1660             dest.push_str(replacement);
1661             last = start + needle.len();
1662         }
1663         dest.push_str(&self.as_bytes()[last..]);
1664     }
1665 
1666     /// Replace up to `limit` matches of the given needle with the given
1667     /// replacement, and write the result into the provided `Vec<u8>`.
1668     ///
1669     /// This does **not** clear `dest` before writing to it.
1670     ///
1671     /// This routine is useful for reusing allocation. For a more convenient
1672     /// API, use [`replacen`](#method.replacen) instead.
1673     ///
1674     /// # Examples
1675     ///
1676     /// Basic usage:
1677     ///
1678     /// ```
1679     /// use bstr::ByteSlice;
1680     ///
1681     /// let s = b"foofoo";
1682     ///
1683     /// let mut dest = vec![];
1684     /// s.replacen_into("o", "z", 2, &mut dest);
1685     /// assert_eq!(dest, "fzzfoo".as_bytes());
1686     /// ```
1687     ///
1688     /// When the pattern doesn't match:
1689     ///
1690     /// ```
1691     /// use bstr::ByteSlice;
1692     ///
1693     /// let s = b"foofoo";
1694     ///
1695     /// let mut dest = vec![];
1696     /// s.replacen_into("a", "z", 2, &mut dest);
1697     /// assert_eq!(dest, "foofoo".as_bytes());
1698     /// ```
1699     ///
1700     /// When the needle is an empty string:
1701     ///
1702     /// ```
1703     /// use bstr::ByteSlice;
1704     ///
1705     /// let s = b"foo";
1706     ///
1707     /// let mut dest = vec![];
1708     /// s.replacen_into("", "Z", 2, &mut dest);
1709     /// assert_eq!(dest, "ZfZoo".as_bytes());
1710     /// ```
1711     #[cfg(feature = "alloc")]
1712     #[inline]
replacen_into<N: AsRef<[u8]>, R: AsRef<[u8]>>( &self, needle: N, replacement: R, limit: usize, dest: &mut Vec<u8>, )1713     fn replacen_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1714         &self,
1715         needle: N,
1716         replacement: R,
1717         limit: usize,
1718         dest: &mut Vec<u8>,
1719     ) {
1720         let (needle, replacement) = (needle.as_ref(), replacement.as_ref());
1721 
1722         let mut last = 0;
1723         for start in self.find_iter(needle).take(limit) {
1724             dest.push_str(&self.as_bytes()[last..start]);
1725             dest.push_str(replacement);
1726             last = start + needle.len();
1727         }
1728         dest.push_str(&self.as_bytes()[last..]);
1729     }
1730 
1731     /// Returns an iterator over the bytes in this byte string.
1732     ///
1733     /// # Examples
1734     ///
1735     /// Basic usage:
1736     ///
1737     /// ```
1738     /// use bstr::ByteSlice;
1739     ///
1740     /// let bs = b"foobar";
1741     /// let bytes: Vec<u8> = bs.bytes().collect();
1742     /// assert_eq!(bytes, bs);
1743     /// ```
1744     #[inline]
bytes(&self) -> Bytes<'_>1745     fn bytes(&self) -> Bytes<'_> {
1746         Bytes { it: self.as_bytes().iter() }
1747     }
1748 
1749     /// Returns an iterator over the Unicode scalar values in this byte string.
1750     /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
1751     /// is yielded instead.
1752     ///
1753     /// # Examples
1754     ///
1755     /// Basic usage:
1756     ///
1757     /// ```
1758     /// use bstr::ByteSlice;
1759     ///
1760     /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1761     /// let chars: Vec<char> = bs.chars().collect();
1762     /// assert_eq!(vec!['☃', '\u{FFFD}', '��', '\u{FFFD}', 'a'], chars);
1763     /// ```
1764     ///
1765     /// Codepoints can also be iterated over in reverse:
1766     ///
1767     /// ```
1768     /// use bstr::ByteSlice;
1769     ///
1770     /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1771     /// let chars: Vec<char> = bs.chars().rev().collect();
1772     /// assert_eq!(vec!['a', '\u{FFFD}', '��', '\u{FFFD}', '☃'], chars);
1773     /// ```
1774     #[inline]
chars(&self) -> Chars<'_>1775     fn chars(&self) -> Chars<'_> {
1776         Chars::new(self.as_bytes())
1777     }
1778 
1779     /// Returns an iterator over the Unicode scalar values in this byte string
1780     /// along with their starting and ending byte index positions. If invalid
1781     /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1782     /// instead.
1783     ///
1784     /// Note that this is slightly different from the `CharIndices` iterator
1785     /// provided by the standard library. Aside from working on possibly
1786     /// invalid UTF-8, this iterator provides both the corresponding starting
1787     /// and ending byte indices of each codepoint yielded. The ending position
1788     /// is necessary to slice the original byte string when invalid UTF-8 bytes
1789     /// are converted into a Unicode replacement codepoint, since a single
1790     /// replacement codepoint can substitute anywhere from 1 to 3 invalid bytes
1791     /// (inclusive).
1792     ///
1793     /// # Examples
1794     ///
1795     /// Basic usage:
1796     ///
1797     /// ```
1798     /// use bstr::ByteSlice;
1799     ///
1800     /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1801     /// let chars: Vec<(usize, usize, char)> = bs.char_indices().collect();
1802     /// assert_eq!(chars, vec![
1803     ///     (0, 3, '☃'),
1804     ///     (3, 4, '\u{FFFD}'),
1805     ///     (4, 8, '��'),
1806     ///     (8, 10, '\u{FFFD}'),
1807     ///     (10, 11, 'a'),
1808     /// ]);
1809     /// ```
1810     ///
1811     /// Codepoints can also be iterated over in reverse:
1812     ///
1813     /// ```
1814     /// use bstr::ByteSlice;
1815     ///
1816     /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1817     /// let chars: Vec<(usize, usize, char)> = bs
1818     ///     .char_indices()
1819     ///     .rev()
1820     ///     .collect();
1821     /// assert_eq!(chars, vec![
1822     ///     (10, 11, 'a'),
1823     ///     (8, 10, '\u{FFFD}'),
1824     ///     (4, 8, '��'),
1825     ///     (3, 4, '\u{FFFD}'),
1826     ///     (0, 3, '☃'),
1827     /// ]);
1828     /// ```
1829     #[inline]
char_indices(&self) -> CharIndices<'_>1830     fn char_indices(&self) -> CharIndices<'_> {
1831         CharIndices::new(self.as_bytes())
1832     }
1833 
1834     /// Iterate over chunks of valid UTF-8.
1835     ///
1836     /// The iterator returned yields chunks of valid UTF-8 separated by invalid
1837     /// UTF-8 bytes, if they exist. Invalid UTF-8 bytes are always 1-3 bytes,
1838     /// which are determined via the "substitution of maximal subparts"
1839     /// strategy described in the docs for the
1840     /// [`ByteSlice::to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy)
1841     /// method.
1842     ///
1843     /// # Examples
1844     ///
1845     /// This example shows how to gather all valid and invalid chunks from a
1846     /// byte slice:
1847     ///
1848     /// ```
1849     /// use bstr::{ByteSlice, Utf8Chunk};
1850     ///
1851     /// let bytes = b"foo\xFD\xFEbar\xFF";
1852     ///
1853     /// let (mut valid_chunks, mut invalid_chunks) = (vec![], vec![]);
1854     /// for chunk in bytes.utf8_chunks() {
1855     ///     if !chunk.valid().is_empty() {
1856     ///         valid_chunks.push(chunk.valid());
1857     ///     }
1858     ///     if !chunk.invalid().is_empty() {
1859     ///         invalid_chunks.push(chunk.invalid());
1860     ///     }
1861     /// }
1862     ///
1863     /// assert_eq!(valid_chunks, vec!["foo", "bar"]);
1864     /// assert_eq!(invalid_chunks, vec![b"\xFD", b"\xFE", b"\xFF"]);
1865     /// ```
1866     #[inline]
utf8_chunks(&self) -> Utf8Chunks<'_>1867     fn utf8_chunks(&self) -> Utf8Chunks<'_> {
1868         Utf8Chunks { bytes: self.as_bytes() }
1869     }
1870 
1871     /// Returns an iterator over the grapheme clusters in this byte string.
1872     /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
1873     /// is yielded instead.
1874     ///
1875     /// # Examples
1876     ///
1877     /// This example shows how multiple codepoints can combine to form a
1878     /// single grapheme cluster:
1879     ///
1880     /// ```
1881     /// use bstr::ByteSlice;
1882     ///
1883     /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
1884     /// let graphemes: Vec<&str> = bs.graphemes().collect();
1885     /// assert_eq!(vec!["à̖", "����"], graphemes);
1886     /// ```
1887     ///
1888     /// This shows that graphemes can be iterated over in reverse:
1889     ///
1890     /// ```
1891     /// use bstr::ByteSlice;
1892     ///
1893     /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
1894     /// let graphemes: Vec<&str> = bs.graphemes().rev().collect();
1895     /// assert_eq!(vec!["����", "à̖"], graphemes);
1896     /// ```
1897     #[cfg(feature = "unicode")]
1898     #[inline]
graphemes(&self) -> Graphemes<'_>1899     fn graphemes(&self) -> Graphemes<'_> {
1900         Graphemes::new(self.as_bytes())
1901     }
1902 
1903     /// Returns an iterator over the grapheme clusters in this byte string
1904     /// along with their starting and ending byte index positions. If invalid
1905     /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1906     /// instead.
1907     ///
1908     /// # Examples
1909     ///
1910     /// This example shows how to get the byte offsets of each individual
1911     /// grapheme cluster:
1912     ///
1913     /// ```
1914     /// use bstr::ByteSlice;
1915     ///
1916     /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
1917     /// let graphemes: Vec<(usize, usize, &str)> =
1918     ///     bs.grapheme_indices().collect();
1919     /// assert_eq!(vec![(0, 5, "à̖"), (5, 13, "����")], graphemes);
1920     /// ```
1921     ///
1922     /// This example shows what happens when invalid UTF-8 is encountered. Note
1923     /// that the offsets are valid indices into the original string, and do
1924     /// not necessarily correspond to the length of the `&str` returned!
1925     ///
1926     /// ```
1927     /// # #[cfg(all(feature = "alloc"))] {
1928     /// use bstr::{ByteSlice, ByteVec};
1929     ///
1930     /// let mut bytes = vec![];
1931     /// bytes.push_str("a\u{0300}\u{0316}");
1932     /// bytes.push(b'\xFF');
1933     /// bytes.push_str("\u{1F1FA}\u{1F1F8}");
1934     ///
1935     /// let graphemes: Vec<(usize, usize, &str)> =
1936     ///     bytes.grapheme_indices().collect();
1937     /// assert_eq!(
1938     ///     graphemes,
1939     ///     vec![(0, 5, "à̖"), (5, 6, "\u{FFFD}"), (6, 14, "����")]
1940     /// );
1941     /// # }
1942     /// ```
1943     #[cfg(feature = "unicode")]
1944     #[inline]
grapheme_indices(&self) -> GraphemeIndices<'_>1945     fn grapheme_indices(&self) -> GraphemeIndices<'_> {
1946         GraphemeIndices::new(self.as_bytes())
1947     }
1948 
1949     /// Returns an iterator over the words in this byte string. If invalid
1950     /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1951     /// instead.
1952     ///
1953     /// This is similar to
1954     /// [`words_with_breaks`](trait.ByteSlice.html#method.words_with_breaks),
1955     /// except it only returns elements that contain a "word" character. A word
1956     /// character is defined by UTS #18 (Annex C) to be the combination of the
1957     /// `Alphabetic` and `Join_Control` properties, along with the
1958     /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general
1959     /// categories.
1960     ///
1961     /// Since words are made up of one or more codepoints, this iterator
1962     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1963     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1964     ///
1965     /// # Examples
1966     ///
1967     /// Basic usage:
1968     ///
1969     /// ```
1970     /// use bstr::ByteSlice;
1971     ///
1972     /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#;
1973     /// let words: Vec<&str> = bs.words().collect();
1974     /// assert_eq!(words, vec![
1975     ///     "The", "quick", "brown", "fox", "can't",
1976     ///     "jump", "32.3", "feet", "right",
1977     /// ]);
1978     /// ```
1979     #[cfg(feature = "unicode")]
1980     #[inline]
words(&self) -> Words<'_>1981     fn words(&self) -> Words<'_> {
1982         Words::new(self.as_bytes())
1983     }
1984 
1985     /// Returns an iterator over the words in this byte string along with
1986     /// their starting and ending byte index positions.
1987     ///
1988     /// This is similar to
1989     /// [`words_with_break_indices`](trait.ByteSlice.html#method.words_with_break_indices),
1990     /// except it only returns elements that contain a "word" character. A word
1991     /// character is defined by UTS #18 (Annex C) to be the combination of the
1992     /// `Alphabetic` and `Join_Control` properties, along with the
1993     /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general
1994     /// categories.
1995     ///
1996     /// Since words are made up of one or more codepoints, this iterator
1997     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1998     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1999     ///
2000     /// # Examples
2001     ///
2002     /// This example shows how to get the byte offsets of each individual
2003     /// word:
2004     ///
2005     /// ```
2006     /// use bstr::ByteSlice;
2007     ///
2008     /// let bs = b"can't jump 32.3 feet";
2009     /// let words: Vec<(usize, usize, &str)> = bs.word_indices().collect();
2010     /// assert_eq!(words, vec![
2011     ///     (0, 5, "can't"),
2012     ///     (6, 10, "jump"),
2013     ///     (11, 15, "32.3"),
2014     ///     (16, 20, "feet"),
2015     /// ]);
2016     /// ```
2017     #[cfg(feature = "unicode")]
2018     #[inline]
word_indices(&self) -> WordIndices<'_>2019     fn word_indices(&self) -> WordIndices<'_> {
2020         WordIndices::new(self.as_bytes())
2021     }
2022 
2023     /// Returns an iterator over the words in this byte string, along with
2024     /// all breaks between the words. Concatenating all elements yielded by
2025     /// the iterator results in the original string (modulo Unicode replacement
2026     /// codepoint substitutions if invalid UTF-8 is encountered).
2027     ///
2028     /// Since words are made up of one or more codepoints, this iterator
2029     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2030     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2031     ///
2032     /// # Examples
2033     ///
2034     /// Basic usage:
2035     ///
2036     /// ```
2037     /// use bstr::ByteSlice;
2038     ///
2039     /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#;
2040     /// let words: Vec<&str> = bs.words_with_breaks().collect();
2041     /// assert_eq!(words, vec![
2042     ///     "The", " ", "quick", " ", "(", "\"", "brown", "\"", ")",
2043     ///     " ", "fox", " ", "can't", " ", "jump", " ", "32.3", " ", "feet",
2044     ///     ",", " ", "right", "?",
2045     /// ]);
2046     /// ```
2047     #[cfg(feature = "unicode")]
2048     #[inline]
words_with_breaks(&self) -> WordsWithBreaks<'_>2049     fn words_with_breaks(&self) -> WordsWithBreaks<'_> {
2050         WordsWithBreaks::new(self.as_bytes())
2051     }
2052 
2053     /// Returns an iterator over the words and their byte offsets in this
2054     /// byte string, along with all breaks between the words. Concatenating
2055     /// all elements yielded by the iterator results in the original string
2056     /// (modulo Unicode replacement codepoint substitutions if invalid UTF-8 is
2057     /// encountered).
2058     ///
2059     /// Since words are made up of one or more codepoints, this iterator
2060     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2061     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2062     ///
2063     /// # Examples
2064     ///
2065     /// This example shows how to get the byte offsets of each individual
2066     /// word:
2067     ///
2068     /// ```
2069     /// use bstr::ByteSlice;
2070     ///
2071     /// let bs = b"can't jump 32.3 feet";
2072     /// let words: Vec<(usize, usize, &str)> =
2073     ///     bs.words_with_break_indices().collect();
2074     /// assert_eq!(words, vec![
2075     ///     (0, 5, "can't"),
2076     ///     (5, 6, " "),
2077     ///     (6, 10, "jump"),
2078     ///     (10, 11, " "),
2079     ///     (11, 15, "32.3"),
2080     ///     (15, 16, " "),
2081     ///     (16, 20, "feet"),
2082     /// ]);
2083     /// ```
2084     #[cfg(feature = "unicode")]
2085     #[inline]
words_with_break_indices(&self) -> WordsWithBreakIndices<'_>2086     fn words_with_break_indices(&self) -> WordsWithBreakIndices<'_> {
2087         WordsWithBreakIndices::new(self.as_bytes())
2088     }
2089 
2090     /// Returns an iterator over the sentences in this byte string.
2091     ///
2092     /// Typically, a sentence will include its trailing punctuation and
2093     /// whitespace. Concatenating all elements yielded by the iterator
2094     /// results in the original string (modulo Unicode replacement codepoint
2095     /// substitutions if invalid UTF-8 is encountered).
2096     ///
2097     /// Since sentences are made up of one or more codepoints, this iterator
2098     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2099     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2100     ///
2101     /// # Examples
2102     ///
2103     /// Basic usage:
2104     ///
2105     /// ```
2106     /// use bstr::ByteSlice;
2107     ///
2108     /// let bs = b"I want this. Not that. Right now.";
2109     /// let sentences: Vec<&str> = bs.sentences().collect();
2110     /// assert_eq!(sentences, vec![
2111     ///     "I want this. ",
2112     ///     "Not that. ",
2113     ///     "Right now.",
2114     /// ]);
2115     /// ```
2116     #[cfg(feature = "unicode")]
2117     #[inline]
sentences(&self) -> Sentences<'_>2118     fn sentences(&self) -> Sentences<'_> {
2119         Sentences::new(self.as_bytes())
2120     }
2121 
2122     /// Returns an iterator over the sentences in this byte string along with
2123     /// their starting and ending byte index positions.
2124     ///
2125     /// Typically, a sentence will include its trailing punctuation and
2126     /// whitespace. Concatenating all elements yielded by the iterator
2127     /// results in the original string (modulo Unicode replacement codepoint
2128     /// substitutions if invalid UTF-8 is encountered).
2129     ///
2130     /// Since sentences are made up of one or more codepoints, this iterator
2131     /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2132     /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2133     ///
2134     /// # Examples
2135     ///
2136     /// Basic usage:
2137     ///
2138     /// ```
2139     /// use bstr::ByteSlice;
2140     ///
2141     /// let bs = b"I want this. Not that. Right now.";
2142     /// let sentences: Vec<(usize, usize, &str)> =
2143     ///     bs.sentence_indices().collect();
2144     /// assert_eq!(sentences, vec![
2145     ///     (0, 13, "I want this. "),
2146     ///     (13, 23, "Not that. "),
2147     ///     (23, 33, "Right now."),
2148     /// ]);
2149     /// ```
2150     #[cfg(feature = "unicode")]
2151     #[inline]
sentence_indices(&self) -> SentenceIndices<'_>2152     fn sentence_indices(&self) -> SentenceIndices<'_> {
2153         SentenceIndices::new(self.as_bytes())
2154     }
2155 
2156     /// An iterator over all lines in a byte string, without their
2157     /// terminators.
2158     ///
2159     /// For this iterator, the only line terminators recognized are `\r\n` and
2160     /// `\n`.
2161     ///
2162     /// # Examples
2163     ///
2164     /// Basic usage:
2165     ///
2166     /// ```
2167     /// use bstr::{B, ByteSlice};
2168     ///
2169     /// let s = b"\
2170     /// foo
2171     ///
2172     /// bar\r
2173     /// baz
2174     ///
2175     ///
2176     /// quux";
2177     /// let lines: Vec<&[u8]> = s.lines().collect();
2178     /// assert_eq!(lines, vec![
2179     ///     B("foo"), B(""), B("bar"), B("baz"), B(""), B(""), B("quux"),
2180     /// ]);
2181     /// ```
2182     #[inline]
lines(&self) -> Lines<'_>2183     fn lines(&self) -> Lines<'_> {
2184         Lines::new(self.as_bytes())
2185     }
2186 
2187     /// An iterator over all lines in a byte string, including their
2188     /// terminators.
2189     ///
2190     /// For this iterator, the only line terminator recognized is `\n`. (Since
2191     /// line terminators are included, this also handles `\r\n` line endings.)
2192     ///
2193     /// Line terminators are only included if they are present in the original
2194     /// byte string. For example, the last line in a byte string may not end
2195     /// with a line terminator.
2196     ///
2197     /// Concatenating all elements yielded by this iterator is guaranteed to
2198     /// yield the original byte string.
2199     ///
2200     /// # Examples
2201     ///
2202     /// Basic usage:
2203     ///
2204     /// ```
2205     /// use bstr::{B, ByteSlice};
2206     ///
2207     /// let s = b"\
2208     /// foo
2209     ///
2210     /// bar\r
2211     /// baz
2212     ///
2213     ///
2214     /// quux";
2215     /// let lines: Vec<&[u8]> = s.lines_with_terminator().collect();
2216     /// assert_eq!(lines, vec![
2217     ///     B("foo\n"),
2218     ///     B("\n"),
2219     ///     B("bar\r\n"),
2220     ///     B("baz\n"),
2221     ///     B("\n"),
2222     ///     B("\n"),
2223     ///     B("quux"),
2224     /// ]);
2225     /// ```
2226     #[inline]
lines_with_terminator(&self) -> LinesWithTerminator<'_>2227     fn lines_with_terminator(&self) -> LinesWithTerminator<'_> {
2228         LinesWithTerminator::new(self.as_bytes())
2229     }
2230 
2231     /// Return a byte string slice with leading and trailing whitespace
2232     /// removed.
2233     ///
2234     /// Whitespace is defined according to the terms of the `White_Space`
2235     /// Unicode property.
2236     ///
2237     /// # Examples
2238     ///
2239     /// Basic usage:
2240     ///
2241     /// ```
2242     /// use bstr::{B, ByteSlice};
2243     ///
2244     /// let s = B(" foo\tbar\t\u{2003}\n");
2245     /// assert_eq!(s.trim(), B("foo\tbar"));
2246     /// ```
2247     #[cfg(feature = "unicode")]
2248     #[inline]
trim(&self) -> &[u8]2249     fn trim(&self) -> &[u8] {
2250         self.trim_start().trim_end()
2251     }
2252 
2253     /// Return a byte string slice with leading whitespace removed.
2254     ///
2255     /// Whitespace is defined according to the terms of the `White_Space`
2256     /// Unicode property.
2257     ///
2258     /// # Examples
2259     ///
2260     /// Basic usage:
2261     ///
2262     /// ```
2263     /// use bstr::{B, ByteSlice};
2264     ///
2265     /// let s = B(" foo\tbar\t\u{2003}\n");
2266     /// assert_eq!(s.trim_start(), B("foo\tbar\t\u{2003}\n"));
2267     /// ```
2268     #[cfg(feature = "unicode")]
2269     #[inline]
trim_start(&self) -> &[u8]2270     fn trim_start(&self) -> &[u8] {
2271         let start = whitespace_len_fwd(self.as_bytes());
2272         &self.as_bytes()[start..]
2273     }
2274 
2275     /// Return a byte string slice with trailing whitespace removed.
2276     ///
2277     /// Whitespace is defined according to the terms of the `White_Space`
2278     /// Unicode property.
2279     ///
2280     /// # Examples
2281     ///
2282     /// Basic usage:
2283     ///
2284     /// ```
2285     /// use bstr::{B, ByteSlice};
2286     ///
2287     /// let s = B(" foo\tbar\t\u{2003}\n");
2288     /// assert_eq!(s.trim_end(), B(" foo\tbar"));
2289     /// ```
2290     #[cfg(feature = "unicode")]
2291     #[inline]
trim_end(&self) -> &[u8]2292     fn trim_end(&self) -> &[u8] {
2293         let end = whitespace_len_rev(self.as_bytes());
2294         &self.as_bytes()[..end]
2295     }
2296 
2297     /// Return a byte string slice with leading and trailing characters
2298     /// satisfying the given predicate removed.
2299     ///
2300     /// # Examples
2301     ///
2302     /// Basic usage:
2303     ///
2304     /// ```
2305     /// use bstr::{B, ByteSlice};
2306     ///
2307     /// let s = b"123foo5bar789";
2308     /// assert_eq!(s.trim_with(|c| c.is_numeric()), B("foo5bar"));
2309     /// ```
2310     #[inline]
trim_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8]2311     fn trim_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
2312         self.trim_start_with(&mut trim).trim_end_with(&mut trim)
2313     }
2314 
2315     /// Return a byte string slice with leading characters satisfying the given
2316     /// predicate removed.
2317     ///
2318     /// # Examples
2319     ///
2320     /// Basic usage:
2321     ///
2322     /// ```
2323     /// use bstr::{B, ByteSlice};
2324     ///
2325     /// let s = b"123foo5bar789";
2326     /// assert_eq!(s.trim_start_with(|c| c.is_numeric()), B("foo5bar789"));
2327     /// ```
2328     #[inline]
trim_start_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8]2329     fn trim_start_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
2330         for (s, _, ch) in self.char_indices() {
2331             if !trim(ch) {
2332                 return &self.as_bytes()[s..];
2333             }
2334         }
2335         b""
2336     }
2337 
2338     /// Return a byte string slice with trailing characters satisfying the
2339     /// given predicate removed.
2340     ///
2341     /// # Examples
2342     ///
2343     /// Basic usage:
2344     ///
2345     /// ```
2346     /// use bstr::{B, ByteSlice};
2347     ///
2348     /// let s = b"123foo5bar789";
2349     /// assert_eq!(s.trim_end_with(|c| c.is_numeric()), B("123foo5bar"));
2350     /// ```
2351     #[inline]
trim_end_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8]2352     fn trim_end_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
2353         for (_, e, ch) in self.char_indices().rev() {
2354             if !trim(ch) {
2355                 return &self.as_bytes()[..e];
2356             }
2357         }
2358         b""
2359     }
2360 
2361     /// Returns a new `Vec<u8>` containing the lowercase equivalent of this
2362     /// byte string.
2363     ///
2364     /// In this case, lowercase is defined according to the `Lowercase` Unicode
2365     /// property.
2366     ///
2367     /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
2368     /// then it is written to the given buffer unchanged.
2369     ///
2370     /// Note that some characters in this byte string may expand into multiple
2371     /// characters when changing the case, so the number of bytes written to
2372     /// the given byte string may not be equivalent to the number of bytes in
2373     /// this byte string.
2374     ///
2375     /// If you'd like to reuse an allocation for performance reasons, then use
2376     /// [`to_lowercase_into`](#method.to_lowercase_into) instead.
2377     ///
2378     /// # Examples
2379     ///
2380     /// Basic usage:
2381     ///
2382     /// ```
2383     /// use bstr::{B, ByteSlice};
2384     ///
2385     /// let s = B("HELLO Β");
2386     /// assert_eq!("hello β".as_bytes(), s.to_lowercase().as_bytes());
2387     /// ```
2388     ///
2389     /// Scripts without case are not changed:
2390     ///
2391     /// ```
2392     /// use bstr::{B, ByteSlice};
2393     ///
2394     /// let s = B("农历新年");
2395     /// assert_eq!("农历新年".as_bytes(), s.to_lowercase().as_bytes());
2396     /// ```
2397     ///
2398     /// Invalid UTF-8 remains as is:
2399     ///
2400     /// ```
2401     /// use bstr::{B, ByteSlice};
2402     ///
2403     /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
2404     /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), s.to_lowercase().as_bytes());
2405     /// ```
2406     #[cfg(all(feature = "alloc", feature = "unicode"))]
2407     #[inline]
to_lowercase(&self) -> Vec<u8>2408     fn to_lowercase(&self) -> Vec<u8> {
2409         let mut buf = vec![];
2410         self.to_lowercase_into(&mut buf);
2411         buf
2412     }
2413 
2414     /// Writes the lowercase equivalent of this byte string into the given
2415     /// buffer. The buffer is not cleared before written to.
2416     ///
2417     /// In this case, lowercase is defined according to the `Lowercase`
2418     /// Unicode property.
2419     ///
2420     /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
2421     /// then it is written to the given buffer unchanged.
2422     ///
2423     /// Note that some characters in this byte string may expand into multiple
2424     /// characters when changing the case, so the number of bytes written to
2425     /// the given byte string may not be equivalent to the number of bytes in
2426     /// this byte string.
2427     ///
2428     /// If you don't need to amortize allocation and instead prefer
2429     /// convenience, then use [`to_lowercase`](#method.to_lowercase) instead.
2430     ///
2431     /// # Examples
2432     ///
2433     /// Basic usage:
2434     ///
2435     /// ```
2436     /// use bstr::{B, ByteSlice};
2437     ///
2438     /// let s = B("HELLO Β");
2439     ///
2440     /// let mut buf = vec![];
2441     /// s.to_lowercase_into(&mut buf);
2442     /// assert_eq!("hello β".as_bytes(), buf.as_bytes());
2443     /// ```
2444     ///
2445     /// Scripts without case are not changed:
2446     ///
2447     /// ```
2448     /// use bstr::{B, ByteSlice};
2449     ///
2450     /// let s = B("农历新年");
2451     ///
2452     /// let mut buf = vec![];
2453     /// s.to_lowercase_into(&mut buf);
2454     /// assert_eq!("农历新年".as_bytes(), buf.as_bytes());
2455     /// ```
2456     ///
2457     /// Invalid UTF-8 remains as is:
2458     ///
2459     /// ```
2460     /// use bstr::{B, ByteSlice};
2461     ///
2462     /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
2463     ///
2464     /// let mut buf = vec![];
2465     /// s.to_lowercase_into(&mut buf);
2466     /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), buf.as_bytes());
2467     /// ```
2468     #[cfg(all(feature = "alloc", feature = "unicode"))]
2469     #[inline]
to_lowercase_into(&self, buf: &mut Vec<u8>)2470     fn to_lowercase_into(&self, buf: &mut Vec<u8>) {
2471         // TODO: This is the best we can do given what std exposes I think.
2472         // If we roll our own case handling, then we might be able to do this
2473         // a bit faster. We shouldn't roll our own case handling unless we
2474         // need to, e.g., for doing caseless matching or case folding.
2475 
2476         // TODO(BUG): This doesn't handle any special casing rules.
2477 
2478         buf.reserve(self.as_bytes().len());
2479         for (s, e, ch) in self.char_indices() {
2480             if ch == '\u{FFFD}' {
2481                 buf.push_str(&self.as_bytes()[s..e]);
2482             } else if ch.is_ascii() {
2483                 buf.push_char(ch.to_ascii_lowercase());
2484             } else {
2485                 for upper in ch.to_lowercase() {
2486                     buf.push_char(upper);
2487                 }
2488             }
2489         }
2490     }
2491 
2492     /// Returns a new `Vec<u8>` containing the ASCII lowercase equivalent of
2493     /// this byte string.
2494     ///
2495     /// In this case, lowercase is only defined in ASCII letters. Namely, the
2496     /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
2497     /// In particular, the length of the byte string returned is always
2498     /// equivalent to the length of this byte string.
2499     ///
2500     /// If you'd like to reuse an allocation for performance reasons, then use
2501     /// [`make_ascii_lowercase`](#method.make_ascii_lowercase) to perform
2502     /// the conversion in place.
2503     ///
2504     /// # Examples
2505     ///
2506     /// Basic usage:
2507     ///
2508     /// ```
2509     /// use bstr::{B, ByteSlice};
2510     ///
2511     /// let s = B("HELLO Β");
2512     /// assert_eq!("hello Β".as_bytes(), s.to_ascii_lowercase().as_bytes());
2513     /// ```
2514     ///
2515     /// Invalid UTF-8 remains as is:
2516     ///
2517     /// ```
2518     /// use bstr::{B, ByteSlice};
2519     ///
2520     /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
2521     /// assert_eq!(s.to_ascii_lowercase(), B(b"foo\xFFbar\xE2\x98baz"));
2522     /// ```
2523     #[cfg(feature = "alloc")]
2524     #[inline]
to_ascii_lowercase(&self) -> Vec<u8>2525     fn to_ascii_lowercase(&self) -> Vec<u8> {
2526         self.as_bytes().to_ascii_lowercase()
2527     }
2528 
2529     /// Convert this byte string to its lowercase ASCII equivalent in place.
2530     ///
2531     /// In this case, lowercase is only defined in ASCII letters. Namely, the
2532     /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
2533     ///
2534     /// If you don't need to do the conversion in
2535     /// place and instead prefer convenience, then use
2536     /// [`to_ascii_lowercase`](#method.to_ascii_lowercase) instead.
2537     ///
2538     /// # Examples
2539     ///
2540     /// Basic usage:
2541     ///
2542     /// ```
2543     /// use bstr::ByteSlice;
2544     ///
2545     /// let mut s = <Vec<u8>>::from("HELLO Β");
2546     /// s.make_ascii_lowercase();
2547     /// assert_eq!(s, "hello Β".as_bytes());
2548     /// ```
2549     ///
2550     /// Invalid UTF-8 remains as is:
2551     ///
2552     /// ```
2553     /// # #[cfg(feature = "alloc")] {
2554     /// use bstr::{B, ByteSlice, ByteVec};
2555     ///
2556     /// let mut s = <Vec<u8>>::from_slice(b"FOO\xFFBAR\xE2\x98BAZ");
2557     /// s.make_ascii_lowercase();
2558     /// assert_eq!(s, B(b"foo\xFFbar\xE2\x98baz"));
2559     /// # }
2560     /// ```
2561     #[inline]
make_ascii_lowercase(&mut self)2562     fn make_ascii_lowercase(&mut self) {
2563         self.as_bytes_mut().make_ascii_lowercase();
2564     }
2565 
2566     /// Returns a new `Vec<u8>` containing the uppercase equivalent of this
2567     /// byte string.
2568     ///
2569     /// In this case, uppercase is defined according to the `Uppercase`
2570     /// Unicode property.
2571     ///
2572     /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
2573     /// then it is written to the given buffer unchanged.
2574     ///
2575     /// Note that some characters in this byte string may expand into multiple
2576     /// characters when changing the case, so the number of bytes written to
2577     /// the given byte string may not be equivalent to the number of bytes in
2578     /// this byte string.
2579     ///
2580     /// If you'd like to reuse an allocation for performance reasons, then use
2581     /// [`to_uppercase_into`](#method.to_uppercase_into) instead.
2582     ///
2583     /// # Examples
2584     ///
2585     /// Basic usage:
2586     ///
2587     /// ```
2588     /// use bstr::{B, ByteSlice};
2589     ///
2590     /// let s = B("hello β");
2591     /// assert_eq!(s.to_uppercase(), B("HELLO Β"));
2592     /// ```
2593     ///
2594     /// Scripts without case are not changed:
2595     ///
2596     /// ```
2597     /// use bstr::{B, ByteSlice};
2598     ///
2599     /// let s = B("农历新年");
2600     /// assert_eq!(s.to_uppercase(), B("农历新年"));
2601     /// ```
2602     ///
2603     /// Invalid UTF-8 remains as is:
2604     ///
2605     /// ```
2606     /// use bstr::{B, ByteSlice};
2607     ///
2608     /// let s = B(b"foo\xFFbar\xE2\x98baz");
2609     /// assert_eq!(s.to_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ"));
2610     /// ```
2611     #[cfg(all(feature = "alloc", feature = "unicode"))]
2612     #[inline]
to_uppercase(&self) -> Vec<u8>2613     fn to_uppercase(&self) -> Vec<u8> {
2614         let mut buf = vec![];
2615         self.to_uppercase_into(&mut buf);
2616         buf
2617     }
2618 
2619     /// Writes the uppercase equivalent of this byte string into the given
2620     /// buffer. The buffer is not cleared before written to.
2621     ///
2622     /// In this case, uppercase is defined according to the `Uppercase`
2623     /// Unicode property.
2624     ///
2625     /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
2626     /// then it is written to the given buffer unchanged.
2627     ///
2628     /// Note that some characters in this byte string may expand into multiple
2629     /// characters when changing the case, so the number of bytes written to
2630     /// the given byte string may not be equivalent to the number of bytes in
2631     /// this byte string.
2632     ///
2633     /// If you don't need to amortize allocation and instead prefer
2634     /// convenience, then use [`to_uppercase`](#method.to_uppercase) instead.
2635     ///
2636     /// # Examples
2637     ///
2638     /// Basic usage:
2639     ///
2640     /// ```
2641     /// use bstr::{B, ByteSlice};
2642     ///
2643     /// let s = B("hello β");
2644     ///
2645     /// let mut buf = vec![];
2646     /// s.to_uppercase_into(&mut buf);
2647     /// assert_eq!(buf, B("HELLO Β"));
2648     /// ```
2649     ///
2650     /// Scripts without case are not changed:
2651     ///
2652     /// ```
2653     /// use bstr::{B, ByteSlice};
2654     ///
2655     /// let s = B("农历新年");
2656     ///
2657     /// let mut buf = vec![];
2658     /// s.to_uppercase_into(&mut buf);
2659     /// assert_eq!(buf, B("农历新年"));
2660     /// ```
2661     ///
2662     /// Invalid UTF-8 remains as is:
2663     ///
2664     /// ```
2665     /// use bstr::{B, ByteSlice};
2666     ///
2667     /// let s = B(b"foo\xFFbar\xE2\x98baz");
2668     ///
2669     /// let mut buf = vec![];
2670     /// s.to_uppercase_into(&mut buf);
2671     /// assert_eq!(buf, B(b"FOO\xFFBAR\xE2\x98BAZ"));
2672     /// ```
2673     #[cfg(all(feature = "alloc", feature = "unicode"))]
2674     #[inline]
to_uppercase_into(&self, buf: &mut Vec<u8>)2675     fn to_uppercase_into(&self, buf: &mut Vec<u8>) {
2676         // TODO: This is the best we can do given what std exposes I think.
2677         // If we roll our own case handling, then we might be able to do this
2678         // a bit faster. We shouldn't roll our own case handling unless we
2679         // need to, e.g., for doing caseless matching or case folding.
2680         buf.reserve(self.as_bytes().len());
2681         for (s, e, ch) in self.char_indices() {
2682             if ch == '\u{FFFD}' {
2683                 buf.push_str(&self.as_bytes()[s..e]);
2684             } else if ch.is_ascii() {
2685                 buf.push_char(ch.to_ascii_uppercase());
2686             } else {
2687                 for upper in ch.to_uppercase() {
2688                     buf.push_char(upper);
2689                 }
2690             }
2691         }
2692     }
2693 
2694     /// Returns a new `Vec<u8>` containing the ASCII uppercase equivalent of
2695     /// this byte string.
2696     ///
2697     /// In this case, uppercase is only defined in ASCII letters. Namely, the
2698     /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
2699     /// In particular, the length of the byte string returned is always
2700     /// equivalent to the length of this byte string.
2701     ///
2702     /// If you'd like to reuse an allocation for performance reasons, then use
2703     /// [`make_ascii_uppercase`](#method.make_ascii_uppercase) to perform
2704     /// the conversion in place.
2705     ///
2706     /// # Examples
2707     ///
2708     /// Basic usage:
2709     ///
2710     /// ```
2711     /// use bstr::{B, ByteSlice};
2712     ///
2713     /// let s = B("hello β");
2714     /// assert_eq!(s.to_ascii_uppercase(), B("HELLO β"));
2715     /// ```
2716     ///
2717     /// Invalid UTF-8 remains as is:
2718     ///
2719     /// ```
2720     /// use bstr::{B, ByteSlice};
2721     ///
2722     /// let s = B(b"foo\xFFbar\xE2\x98baz");
2723     /// assert_eq!(s.to_ascii_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ"));
2724     /// ```
2725     #[cfg(feature = "alloc")]
2726     #[inline]
to_ascii_uppercase(&self) -> Vec<u8>2727     fn to_ascii_uppercase(&self) -> Vec<u8> {
2728         self.as_bytes().to_ascii_uppercase()
2729     }
2730 
2731     /// Convert this byte string to its uppercase ASCII equivalent in place.
2732     ///
2733     /// In this case, uppercase is only defined in ASCII letters. Namely, the
2734     /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
2735     ///
2736     /// If you don't need to do the conversion in
2737     /// place and instead prefer convenience, then use
2738     /// [`to_ascii_uppercase`](#method.to_ascii_uppercase) instead.
2739     ///
2740     /// # Examples
2741     ///
2742     /// Basic usage:
2743     ///
2744     /// ```
2745     /// use bstr::{B, ByteSlice};
2746     ///
2747     /// let mut s = <Vec<u8>>::from("hello β");
2748     /// s.make_ascii_uppercase();
2749     /// assert_eq!(s, B("HELLO β"));
2750     /// ```
2751     ///
2752     /// Invalid UTF-8 remains as is:
2753     ///
2754     /// ```
2755     /// # #[cfg(feature = "alloc")] {
2756     /// use bstr::{B, ByteSlice, ByteVec};
2757     ///
2758     /// let mut s = <Vec<u8>>::from_slice(b"foo\xFFbar\xE2\x98baz");
2759     /// s.make_ascii_uppercase();
2760     /// assert_eq!(s, B(b"FOO\xFFBAR\xE2\x98BAZ"));
2761     /// # }
2762     /// ```
2763     #[inline]
make_ascii_uppercase(&mut self)2764     fn make_ascii_uppercase(&mut self) {
2765         self.as_bytes_mut().make_ascii_uppercase();
2766     }
2767 
2768     /// Reverse the bytes in this string, in place.
2769     ///
2770     /// This is not necessarily a well formed operation! For example, if this
2771     /// byte string contains valid UTF-8 that isn't ASCII, then reversing the
2772     /// string will likely result in invalid UTF-8 and otherwise non-sensical
2773     /// content.
2774     ///
2775     /// Note that this is equivalent to the generic `[u8]::reverse` method.
2776     /// This method is provided to permit callers to explicitly differentiate
2777     /// between reversing bytes, codepoints and graphemes.
2778     ///
2779     /// # Examples
2780     ///
2781     /// Basic usage:
2782     ///
2783     /// ```
2784     /// use bstr::ByteSlice;
2785     ///
2786     /// let mut s = <Vec<u8>>::from("hello");
2787     /// s.reverse_bytes();
2788     /// assert_eq!(s, "olleh".as_bytes());
2789     /// ```
2790     #[inline]
reverse_bytes(&mut self)2791     fn reverse_bytes(&mut self) {
2792         self.as_bytes_mut().reverse();
2793     }
2794 
2795     /// Reverse the codepoints in this string, in place.
2796     ///
2797     /// If this byte string is valid UTF-8, then its reversal by codepoint
2798     /// is also guaranteed to be valid UTF-8.
2799     ///
2800     /// This operation is equivalent to the following, but without allocating:
2801     ///
2802     /// ```
2803     /// use bstr::ByteSlice;
2804     ///
2805     /// let mut s = <Vec<u8>>::from("foo☃bar");
2806     ///
2807     /// let mut chars: Vec<char> = s.chars().collect();
2808     /// chars.reverse();
2809     ///
2810     /// let reversed: String = chars.into_iter().collect();
2811     /// assert_eq!(reversed, "rab☃oof");
2812     /// ```
2813     ///
2814     /// Note that this is not necessarily a well formed operation. For example,
2815     /// if this byte string contains grapheme clusters with more than one
2816     /// codepoint, then those grapheme clusters will not necessarily be
2817     /// preserved. If you'd like to preserve grapheme clusters, then use
2818     /// [`reverse_graphemes`](#method.reverse_graphemes) instead.
2819     ///
2820     /// # Examples
2821     ///
2822     /// Basic usage:
2823     ///
2824     /// ```
2825     /// use bstr::ByteSlice;
2826     ///
2827     /// let mut s = <Vec<u8>>::from("foo☃bar");
2828     /// s.reverse_chars();
2829     /// assert_eq!(s, "rab☃oof".as_bytes());
2830     /// ```
2831     ///
2832     /// This example shows that not all reversals lead to a well formed string.
2833     /// For example, in this case, combining marks are used to put accents over
2834     /// some letters, and those accent marks must appear after the codepoints
2835     /// they modify.
2836     ///
2837     /// ```
2838     /// use bstr::{B, ByteSlice};
2839     ///
2840     /// let mut s = <Vec<u8>>::from("résumé");
2841     /// s.reverse_chars();
2842     /// assert_eq!(s, B(b"\xCC\x81emus\xCC\x81er"));
2843     /// ```
2844     ///
2845     /// A word of warning: the above example relies on the fact that
2846     /// `résumé` is in decomposed normal form, which means there are separate
2847     /// codepoints for the accents above `e`. If it is instead in composed
2848     /// normal form, then the example works:
2849     ///
2850     /// ```
2851     /// use bstr::{B, ByteSlice};
2852     ///
2853     /// let mut s = <Vec<u8>>::from("résumé");
2854     /// s.reverse_chars();
2855     /// assert_eq!(s, B("émusér"));
2856     /// ```
2857     ///
2858     /// The point here is to be cautious and not assume that just because
2859     /// `reverse_chars` works in one case, that it therefore works in all
2860     /// cases.
2861     #[inline]
reverse_chars(&mut self)2862     fn reverse_chars(&mut self) {
2863         let mut i = 0;
2864         loop {
2865             let (_, size) = utf8::decode(&self.as_bytes()[i..]);
2866             if size == 0 {
2867                 break;
2868             }
2869             if size > 1 {
2870                 self.as_bytes_mut()[i..i + size].reverse_bytes();
2871             }
2872             i += size;
2873         }
2874         self.reverse_bytes();
2875     }
2876 
2877     /// Reverse the graphemes in this string, in place.
2878     ///
2879     /// If this byte string is valid UTF-8, then its reversal by grapheme
2880     /// is also guaranteed to be valid UTF-8.
2881     ///
2882     /// This operation is equivalent to the following, but without allocating:
2883     ///
2884     /// ```
2885     /// use bstr::ByteSlice;
2886     ///
2887     /// let mut s = <Vec<u8>>::from("foo☃bar");
2888     ///
2889     /// let mut graphemes: Vec<&str> = s.graphemes().collect();
2890     /// graphemes.reverse();
2891     ///
2892     /// let reversed = graphemes.concat();
2893     /// assert_eq!(reversed, "rab☃oof");
2894     /// ```
2895     ///
2896     /// # Examples
2897     ///
2898     /// Basic usage:
2899     ///
2900     /// ```
2901     /// use bstr::ByteSlice;
2902     ///
2903     /// let mut s = <Vec<u8>>::from("foo☃bar");
2904     /// s.reverse_graphemes();
2905     /// assert_eq!(s, "rab☃oof".as_bytes());
2906     /// ```
2907     ///
2908     /// This example shows how this correctly handles grapheme clusters,
2909     /// unlike `reverse_chars`.
2910     ///
2911     /// ```
2912     /// use bstr::ByteSlice;
2913     ///
2914     /// let mut s = <Vec<u8>>::from("résumé");
2915     /// s.reverse_graphemes();
2916     /// assert_eq!(s, "émusér".as_bytes());
2917     /// ```
2918     #[cfg(feature = "unicode")]
2919     #[inline]
reverse_graphemes(&mut self)2920     fn reverse_graphemes(&mut self) {
2921         use crate::unicode::decode_grapheme;
2922 
2923         let mut i = 0;
2924         loop {
2925             let (_, size) = decode_grapheme(&self.as_bytes()[i..]);
2926             if size == 0 {
2927                 break;
2928             }
2929             if size > 1 {
2930                 self.as_bytes_mut()[i..i + size].reverse_bytes();
2931             }
2932             i += size;
2933         }
2934         self.reverse_bytes();
2935     }
2936 
2937     /// Returns true if and only if every byte in this byte string is ASCII.
2938     ///
2939     /// ASCII is an encoding that defines 128 codepoints. A byte corresponds to
2940     /// an ASCII codepoint if and only if it is in the inclusive range
2941     /// `[0, 127]`.
2942     ///
2943     /// # Examples
2944     ///
2945     /// Basic usage:
2946     ///
2947     /// ```
2948     /// use bstr::{B, ByteSlice};
2949     ///
2950     /// assert!(B("abc").is_ascii());
2951     /// assert!(!B("☃βツ").is_ascii());
2952     /// assert!(!B(b"\xFF").is_ascii());
2953     /// ```
2954     #[inline]
is_ascii(&self) -> bool2955     fn is_ascii(&self) -> bool {
2956         ascii::first_non_ascii_byte(self.as_bytes()) == self.as_bytes().len()
2957     }
2958 
2959     /// Returns true if and only if the entire byte string is valid UTF-8.
2960     ///
2961     /// If you need location information about where a byte string's first
2962     /// invalid UTF-8 byte is, then use the [`to_str`](#method.to_str) method.
2963     ///
2964     /// # Examples
2965     ///
2966     /// Basic usage:
2967     ///
2968     /// ```
2969     /// use bstr::{B, ByteSlice};
2970     ///
2971     /// assert!(B("abc").is_utf8());
2972     /// assert!(B("☃βツ").is_utf8());
2973     /// // invalid bytes
2974     /// assert!(!B(b"abc\xFF").is_utf8());
2975     /// // surrogate encoding
2976     /// assert!(!B(b"\xED\xA0\x80").is_utf8());
2977     /// // incomplete sequence
2978     /// assert!(!B(b"\xF0\x9D\x9Ca").is_utf8());
2979     /// // overlong sequence
2980     /// assert!(!B(b"\xF0\x82\x82\xAC").is_utf8());
2981     /// ```
2982     #[inline]
is_utf8(&self) -> bool2983     fn is_utf8(&self) -> bool {
2984         utf8::validate(self.as_bytes()).is_ok()
2985     }
2986 
2987     /// Returns the last byte in this byte string, if it's non-empty. If this
2988     /// byte string is empty, this returns `None`.
2989     ///
2990     /// Note that this is like the generic `[u8]::last`, except this returns
2991     /// the byte by value instead of a reference to the byte.
2992     ///
2993     /// # Examples
2994     ///
2995     /// Basic usage:
2996     ///
2997     /// ```
2998     /// use bstr::ByteSlice;
2999     ///
3000     /// assert_eq!(Some(b'z'), b"baz".last_byte());
3001     /// assert_eq!(None, b"".last_byte());
3002     /// ```
3003     #[inline]
last_byte(&self) -> Option<u8>3004     fn last_byte(&self) -> Option<u8> {
3005         let bytes = self.as_bytes();
3006         bytes.get(bytes.len().saturating_sub(1)).map(|&b| b)
3007     }
3008 
3009     /// Returns the index of the first non-ASCII byte in this byte string (if
3010     /// any such indices exist). Specifically, it returns the index of the
3011     /// first byte with a value greater than or equal to `0x80`.
3012     ///
3013     /// # Examples
3014     ///
3015     /// Basic usage:
3016     ///
3017     /// ```
3018     /// use bstr::{ByteSlice, B};
3019     ///
3020     /// assert_eq!(Some(3), b"abc\xff".find_non_ascii_byte());
3021     /// assert_eq!(None, b"abcde".find_non_ascii_byte());
3022     /// assert_eq!(Some(0), B("��").find_non_ascii_byte());
3023     /// ```
3024     #[inline]
find_non_ascii_byte(&self) -> Option<usize>3025     fn find_non_ascii_byte(&self) -> Option<usize> {
3026         let index = ascii::first_non_ascii_byte(self.as_bytes());
3027         if index == self.as_bytes().len() {
3028             None
3029         } else {
3030             Some(index)
3031         }
3032     }
3033 }
3034 
3035 /// A single substring searcher fixed to a particular needle.
3036 ///
3037 /// The purpose of this type is to permit callers to construct a substring
3038 /// searcher that can be used to search haystacks without the overhead of
3039 /// constructing the searcher in the first place. This is a somewhat niche
3040 /// concern when it's necessary to re-use the same needle to search multiple
3041 /// different haystacks with as little overhead as possible. In general, using
3042 /// [`ByteSlice::find`](trait.ByteSlice.html#method.find)
3043 /// or
3044 /// [`ByteSlice::find_iter`](trait.ByteSlice.html#method.find_iter)
3045 /// is good enough, but `Finder` is useful when you can meaningfully observe
3046 /// searcher construction time in a profile.
3047 ///
3048 /// When the `std` feature is enabled, then this type has an `into_owned`
3049 /// version which permits building a `Finder` that is not connected to the
3050 /// lifetime of its needle.
3051 #[derive(Clone, Debug)]
3052 pub struct Finder<'a>(memmem::Finder<'a>);
3053 
3054 impl<'a> Finder<'a> {
3055     /// Create a new finder for the given needle.
3056     #[inline]
new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> Finder<'a>3057     pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> Finder<'a> {
3058         Finder(memmem::Finder::new(needle.as_ref()))
3059     }
3060 
3061     /// Convert this finder into its owned variant, such that it no longer
3062     /// borrows the needle.
3063     ///
3064     /// If this is already an owned finder, then this is a no-op. Otherwise,
3065     /// this copies the needle.
3066     ///
3067     /// This is only available when the `std` feature is enabled.
3068     #[cfg(feature = "std")]
3069     #[inline]
into_owned(self) -> Finder<'static>3070     pub fn into_owned(self) -> Finder<'static> {
3071         Finder(self.0.into_owned())
3072     }
3073 
3074     /// Returns the needle that this finder searches for.
3075     ///
3076     /// Note that the lifetime of the needle returned is tied to the lifetime
3077     /// of the finder, and may be shorter than the `'a` lifetime. Namely, a
3078     /// finder's needle can be either borrowed or owned, so the lifetime of the
3079     /// needle returned must necessarily be the shorter of the two.
3080     #[inline]
needle(&self) -> &[u8]3081     pub fn needle(&self) -> &[u8] {
3082         self.0.needle()
3083     }
3084 
3085     /// Returns the index of the first occurrence of this needle in the given
3086     /// haystack.
3087     ///
3088     /// The haystack may be any type that can be cheaply converted into a
3089     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
3090     ///
3091     /// # Complexity
3092     ///
3093     /// This routine is guaranteed to have worst case linear time complexity
3094     /// with respect to both the needle and the haystack. That is, this runs
3095     /// in `O(needle.len() + haystack.len())` time.
3096     ///
3097     /// This routine is also guaranteed to have worst case constant space
3098     /// complexity.
3099     ///
3100     /// # Examples
3101     ///
3102     /// Basic usage:
3103     ///
3104     /// ```
3105     /// use bstr::Finder;
3106     ///
3107     /// let haystack = "foo bar baz";
3108     /// assert_eq!(Some(0), Finder::new("foo").find(haystack));
3109     /// assert_eq!(Some(4), Finder::new("bar").find(haystack));
3110     /// assert_eq!(None, Finder::new("quux").find(haystack));
3111     /// ```
3112     #[inline]
find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize>3113     pub fn find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
3114         self.0.find(haystack.as_ref())
3115     }
3116 }
3117 
3118 /// A single substring reverse searcher fixed to a particular needle.
3119 ///
3120 /// The purpose of this type is to permit callers to construct a substring
3121 /// searcher that can be used to search haystacks without the overhead of
3122 /// constructing the searcher in the first place. This is a somewhat niche
3123 /// concern when it's necessary to re-use the same needle to search multiple
3124 /// different haystacks with as little overhead as possible. In general, using
3125 /// [`ByteSlice::rfind`](trait.ByteSlice.html#method.rfind)
3126 /// or
3127 /// [`ByteSlice::rfind_iter`](trait.ByteSlice.html#method.rfind_iter)
3128 /// is good enough, but `FinderReverse` is useful when you can meaningfully
3129 /// observe searcher construction time in a profile.
3130 ///
3131 /// When the `std` feature is enabled, then this type has an `into_owned`
3132 /// version which permits building a `FinderReverse` that is not connected to
3133 /// the lifetime of its needle.
3134 #[derive(Clone, Debug)]
3135 pub struct FinderReverse<'a>(memmem::FinderRev<'a>);
3136 
3137 impl<'a> FinderReverse<'a> {
3138     /// Create a new reverse finder for the given needle.
3139     #[inline]
new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> FinderReverse<'a>3140     pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> FinderReverse<'a> {
3141         FinderReverse(memmem::FinderRev::new(needle.as_ref()))
3142     }
3143 
3144     /// Convert this finder into its owned variant, such that it no longer
3145     /// borrows the needle.
3146     ///
3147     /// If this is already an owned finder, then this is a no-op. Otherwise,
3148     /// this copies the needle.
3149     ///
3150     /// This is only available when the `std` feature is enabled.
3151     #[cfg(feature = "std")]
3152     #[inline]
into_owned(self) -> FinderReverse<'static>3153     pub fn into_owned(self) -> FinderReverse<'static> {
3154         FinderReverse(self.0.into_owned())
3155     }
3156 
3157     /// Returns the needle that this finder searches for.
3158     ///
3159     /// Note that the lifetime of the needle returned is tied to the lifetime
3160     /// of this finder, and may be shorter than the `'a` lifetime. Namely,
3161     /// a finder's needle can be either borrowed or owned, so the lifetime of
3162     /// the needle returned must necessarily be the shorter of the two.
3163     #[inline]
needle(&self) -> &[u8]3164     pub fn needle(&self) -> &[u8] {
3165         self.0.needle()
3166     }
3167 
3168     /// Returns the index of the last occurrence of this needle in the given
3169     /// haystack.
3170     ///
3171     /// The haystack may be any type that can be cheaply converted into a
3172     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
3173     ///
3174     /// # Complexity
3175     ///
3176     /// This routine is guaranteed to have worst case linear time complexity
3177     /// with respect to both the needle and the haystack. That is, this runs
3178     /// in `O(needle.len() + haystack.len())` time.
3179     ///
3180     /// This routine is also guaranteed to have worst case constant space
3181     /// complexity.
3182     ///
3183     /// # Examples
3184     ///
3185     /// Basic usage:
3186     ///
3187     /// ```
3188     /// use bstr::FinderReverse;
3189     ///
3190     /// let haystack = "foo bar baz";
3191     /// assert_eq!(Some(0), FinderReverse::new("foo").rfind(haystack));
3192     /// assert_eq!(Some(4), FinderReverse::new("bar").rfind(haystack));
3193     /// assert_eq!(None, FinderReverse::new("quux").rfind(haystack));
3194     /// ```
3195     #[inline]
rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize>3196     pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
3197         self.0.rfind(haystack.as_ref())
3198     }
3199 }
3200 
3201 /// An iterator over non-overlapping substring matches.
3202 ///
3203 /// Matches are reported by the byte offset at which they begin.
3204 ///
3205 /// `'h` is the lifetime of the haystack while `'n` is the lifetime of the
3206 /// needle.
3207 #[derive(Debug)]
3208 pub struct Find<'h, 'n> {
3209     it: memmem::FindIter<'h, 'n>,
3210     haystack: &'h [u8],
3211     needle: &'n [u8],
3212 }
3213 
3214 impl<'h, 'n> Find<'h, 'n> {
new(haystack: &'h [u8], needle: &'n [u8]) -> Find<'h, 'n>3215     fn new(haystack: &'h [u8], needle: &'n [u8]) -> Find<'h, 'n> {
3216         Find { it: memmem::find_iter(haystack, needle), haystack, needle }
3217     }
3218 }
3219 
3220 impl<'h, 'n> Iterator for Find<'h, 'n> {
3221     type Item = usize;
3222 
3223     #[inline]
next(&mut self) -> Option<usize>3224     fn next(&mut self) -> Option<usize> {
3225         self.it.next()
3226     }
3227 }
3228 
3229 /// An iterator over non-overlapping substring matches in reverse.
3230 ///
3231 /// Matches are reported by the byte offset at which they begin.
3232 ///
3233 /// `'h` is the lifetime of the haystack while `'n` is the lifetime of the
3234 /// needle.
3235 #[derive(Debug)]
3236 pub struct FindReverse<'h, 'n> {
3237     it: memmem::FindRevIter<'h, 'n>,
3238     haystack: &'h [u8],
3239     needle: &'n [u8],
3240 }
3241 
3242 impl<'h, 'n> FindReverse<'h, 'n> {
new(haystack: &'h [u8], needle: &'n [u8]) -> FindReverse<'h, 'n>3243     fn new(haystack: &'h [u8], needle: &'n [u8]) -> FindReverse<'h, 'n> {
3244         FindReverse {
3245             it: memmem::rfind_iter(haystack, needle),
3246             haystack,
3247             needle,
3248         }
3249     }
3250 
haystack(&self) -> &'h [u8]3251     fn haystack(&self) -> &'h [u8] {
3252         self.haystack
3253     }
3254 
needle(&self) -> &'n [u8]3255     fn needle(&self) -> &'n [u8] {
3256         self.needle
3257     }
3258 }
3259 
3260 impl<'h, 'n> Iterator for FindReverse<'h, 'n> {
3261     type Item = usize;
3262 
3263     #[inline]
next(&mut self) -> Option<usize>3264     fn next(&mut self) -> Option<usize> {
3265         self.it.next()
3266     }
3267 }
3268 
3269 /// An iterator over the bytes in a byte string.
3270 ///
3271 /// `'a` is the lifetime of the byte string being traversed.
3272 #[derive(Clone, Debug)]
3273 pub struct Bytes<'a> {
3274     it: slice::Iter<'a, u8>,
3275 }
3276 
3277 impl<'a> Bytes<'a> {
3278     /// Views the remaining underlying data as a subslice of the original data.
3279     /// This has the same lifetime as the original slice,
3280     /// and so the iterator can continue to be used while this exists.
3281     #[inline]
as_bytes(&self) -> &'a [u8]3282     pub fn as_bytes(&self) -> &'a [u8] {
3283         self.it.as_slice()
3284     }
3285 }
3286 
3287 impl<'a> Iterator for Bytes<'a> {
3288     type Item = u8;
3289 
3290     #[inline]
next(&mut self) -> Option<u8>3291     fn next(&mut self) -> Option<u8> {
3292         self.it.next().map(|&b| b)
3293     }
3294 
3295     #[inline]
size_hint(&self) -> (usize, Option<usize>)3296     fn size_hint(&self) -> (usize, Option<usize>) {
3297         self.it.size_hint()
3298     }
3299 }
3300 
3301 impl<'a> DoubleEndedIterator for Bytes<'a> {
3302     #[inline]
next_back(&mut self) -> Option<u8>3303     fn next_back(&mut self) -> Option<u8> {
3304         self.it.next_back().map(|&b| b)
3305     }
3306 }
3307 
3308 impl<'a> ExactSizeIterator for Bytes<'a> {
3309     #[inline]
len(&self) -> usize3310     fn len(&self) -> usize {
3311         self.it.len()
3312     }
3313 }
3314 
3315 impl<'a> iter::FusedIterator for Bytes<'a> {}
3316 
3317 /// An iterator over the fields in a byte string, separated by whitespace.
3318 ///
3319 /// Whitespace for this iterator is defined by the Unicode property
3320 /// `White_Space`.
3321 ///
3322 /// This iterator splits on contiguous runs of whitespace, such that the fields
3323 /// in `foo\t\t\n  \nbar` are `foo` and `bar`.
3324 ///
3325 /// `'a` is the lifetime of the byte string being split.
3326 #[cfg(feature = "unicode")]
3327 #[derive(Debug)]
3328 pub struct Fields<'a> {
3329     it: FieldsWith<'a, fn(char) -> bool>,
3330 }
3331 
3332 #[cfg(feature = "unicode")]
3333 impl<'a> Fields<'a> {
new(bytes: &'a [u8]) -> Fields<'a>3334     fn new(bytes: &'a [u8]) -> Fields<'a> {
3335         Fields { it: bytes.fields_with(|ch| ch.is_whitespace()) }
3336     }
3337 }
3338 
3339 #[cfg(feature = "unicode")]
3340 impl<'a> Iterator for Fields<'a> {
3341     type Item = &'a [u8];
3342 
3343     #[inline]
next(&mut self) -> Option<&'a [u8]>3344     fn next(&mut self) -> Option<&'a [u8]> {
3345         self.it.next()
3346     }
3347 }
3348 
3349 /// An iterator over fields in the byte string, separated by a predicate over
3350 /// codepoints.
3351 ///
3352 /// This iterator splits a byte string based on its predicate function such
3353 /// that the elements returned are separated by contiguous runs of codepoints
3354 /// for which the predicate returns true.
3355 ///
3356 /// `'a` is the lifetime of the byte string being split, while `F` is the type
3357 /// of the predicate, i.e., `FnMut(char) -> bool`.
3358 #[derive(Debug)]
3359 pub struct FieldsWith<'a, F> {
3360     f: F,
3361     bytes: &'a [u8],
3362     chars: CharIndices<'a>,
3363 }
3364 
3365 impl<'a, F: FnMut(char) -> bool> FieldsWith<'a, F> {
new(bytes: &'a [u8], f: F) -> FieldsWith<'a, F>3366     fn new(bytes: &'a [u8], f: F) -> FieldsWith<'a, F> {
3367         FieldsWith { f, bytes, chars: bytes.char_indices() }
3368     }
3369 }
3370 
3371 impl<'a, F: FnMut(char) -> bool> Iterator for FieldsWith<'a, F> {
3372     type Item = &'a [u8];
3373 
3374     #[inline]
next(&mut self) -> Option<&'a [u8]>3375     fn next(&mut self) -> Option<&'a [u8]> {
3376         let (start, mut end);
3377         loop {
3378             match self.chars.next() {
3379                 None => return None,
3380                 Some((s, e, ch)) => {
3381                     if !(self.f)(ch) {
3382                         start = s;
3383                         end = e;
3384                         break;
3385                     }
3386                 }
3387             }
3388         }
3389         while let Some((_, e, ch)) = self.chars.next() {
3390             if (self.f)(ch) {
3391                 break;
3392             }
3393             end = e;
3394         }
3395         Some(&self.bytes[start..end])
3396     }
3397 }
3398 
3399 /// An iterator over substrings in a byte string, split by a separator.
3400 ///
3401 /// `'h` is the lifetime of the byte string being split (the haystack), while
3402 /// `'s` is the lifetime of the byte string doing the splitting.
3403 #[derive(Debug)]
3404 pub struct Split<'h, 's> {
3405     finder: Find<'h, 's>,
3406     /// The end position of the previous match of our splitter. The element
3407     /// we yield corresponds to the substring starting at `last` up to the
3408     /// beginning of the next match of the splitter.
3409     last: usize,
3410     /// Only set when iteration is complete. A corner case here is when a
3411     /// splitter is matched at the end of the haystack. At that point, we still
3412     /// need to yield an empty string following it.
3413     done: bool,
3414 }
3415 
3416 impl<'h, 's> Split<'h, 's> {
new(haystack: &'h [u8], splitter: &'s [u8]) -> Split<'h, 's>3417     fn new(haystack: &'h [u8], splitter: &'s [u8]) -> Split<'h, 's> {
3418         let finder = haystack.find_iter(splitter);
3419         Split { finder, last: 0, done: false }
3420     }
3421 }
3422 
3423 impl<'h, 's> Iterator for Split<'h, 's> {
3424     type Item = &'h [u8];
3425 
3426     #[inline]
next(&mut self) -> Option<&'h [u8]>3427     fn next(&mut self) -> Option<&'h [u8]> {
3428         let haystack = self.finder.haystack;
3429         match self.finder.next() {
3430             Some(start) => {
3431                 let next = &haystack[self.last..start];
3432                 self.last = start + self.finder.needle.len();
3433                 Some(next)
3434             }
3435             None => {
3436                 if self.last >= haystack.len() {
3437                     if !self.done {
3438                         self.done = true;
3439                         Some(b"")
3440                     } else {
3441                         None
3442                     }
3443                 } else {
3444                     let s = &haystack[self.last..];
3445                     self.last = haystack.len();
3446                     self.done = true;
3447                     Some(s)
3448                 }
3449             }
3450         }
3451     }
3452 }
3453 
3454 /// An iterator over substrings in a byte string, split by a separator, in
3455 /// reverse.
3456 ///
3457 /// `'h` is the lifetime of the byte string being split (the haystack), while
3458 /// `'s` is the lifetime of the byte string doing the splitting.
3459 #[derive(Debug)]
3460 pub struct SplitReverse<'h, 's> {
3461     finder: FindReverse<'h, 's>,
3462     /// The end position of the previous match of our splitter. The element
3463     /// we yield corresponds to the substring starting at `last` up to the
3464     /// beginning of the next match of the splitter.
3465     last: usize,
3466     /// Only set when iteration is complete. A corner case here is when a
3467     /// splitter is matched at the end of the haystack. At that point, we still
3468     /// need to yield an empty string following it.
3469     done: bool,
3470 }
3471 
3472 impl<'h, 's> SplitReverse<'h, 's> {
new(haystack: &'h [u8], splitter: &'s [u8]) -> SplitReverse<'h, 's>3473     fn new(haystack: &'h [u8], splitter: &'s [u8]) -> SplitReverse<'h, 's> {
3474         let finder = haystack.rfind_iter(splitter);
3475         SplitReverse { finder, last: haystack.len(), done: false }
3476     }
3477 }
3478 
3479 impl<'h, 's> Iterator for SplitReverse<'h, 's> {
3480     type Item = &'h [u8];
3481 
3482     #[inline]
next(&mut self) -> Option<&'h [u8]>3483     fn next(&mut self) -> Option<&'h [u8]> {
3484         let haystack = self.finder.haystack();
3485         match self.finder.next() {
3486             Some(start) => {
3487                 let nlen = self.finder.needle().len();
3488                 let next = &haystack[start + nlen..self.last];
3489                 self.last = start;
3490                 Some(next)
3491             }
3492             None => {
3493                 if self.last == 0 {
3494                     if !self.done {
3495                         self.done = true;
3496                         Some(b"")
3497                     } else {
3498                         None
3499                     }
3500                 } else {
3501                     let s = &haystack[..self.last];
3502                     self.last = 0;
3503                     self.done = true;
3504                     Some(s)
3505                 }
3506             }
3507         }
3508     }
3509 }
3510 
3511 /// An iterator over at most `n` substrings in a byte string, split by a
3512 /// separator.
3513 ///
3514 /// `'h` is the lifetime of the byte string being split (the haystack), while
3515 /// `'s` is the lifetime of the byte string doing the splitting.
3516 #[derive(Debug)]
3517 pub struct SplitN<'h, 's> {
3518     split: Split<'h, 's>,
3519     limit: usize,
3520     count: usize,
3521 }
3522 
3523 impl<'h, 's> SplitN<'h, 's> {
new( haystack: &'h [u8], splitter: &'s [u8], limit: usize, ) -> SplitN<'h, 's>3524     fn new(
3525         haystack: &'h [u8],
3526         splitter: &'s [u8],
3527         limit: usize,
3528     ) -> SplitN<'h, 's> {
3529         let split = haystack.split_str(splitter);
3530         SplitN { split, limit, count: 0 }
3531     }
3532 }
3533 
3534 impl<'h, 's> Iterator for SplitN<'h, 's> {
3535     type Item = &'h [u8];
3536 
3537     #[inline]
next(&mut self) -> Option<&'h [u8]>3538     fn next(&mut self) -> Option<&'h [u8]> {
3539         self.count += 1;
3540         if self.count > self.limit || self.split.done {
3541             None
3542         } else if self.count == self.limit {
3543             Some(&self.split.finder.haystack[self.split.last..])
3544         } else {
3545             self.split.next()
3546         }
3547     }
3548 }
3549 
3550 /// An iterator over at most `n` substrings in a byte string, split by a
3551 /// separator, in reverse.
3552 ///
3553 /// `'h` is the lifetime of the byte string being split (the haystack), while
3554 /// `'s` is the lifetime of the byte string doing the splitting.
3555 #[derive(Debug)]
3556 pub struct SplitNReverse<'h, 's> {
3557     split: SplitReverse<'h, 's>,
3558     limit: usize,
3559     count: usize,
3560 }
3561 
3562 impl<'h, 's> SplitNReverse<'h, 's> {
new( haystack: &'h [u8], splitter: &'s [u8], limit: usize, ) -> SplitNReverse<'h, 's>3563     fn new(
3564         haystack: &'h [u8],
3565         splitter: &'s [u8],
3566         limit: usize,
3567     ) -> SplitNReverse<'h, 's> {
3568         let split = haystack.rsplit_str(splitter);
3569         SplitNReverse { split, limit, count: 0 }
3570     }
3571 }
3572 
3573 impl<'h, 's> Iterator for SplitNReverse<'h, 's> {
3574     type Item = &'h [u8];
3575 
3576     #[inline]
next(&mut self) -> Option<&'h [u8]>3577     fn next(&mut self) -> Option<&'h [u8]> {
3578         self.count += 1;
3579         if self.count > self.limit || self.split.done {
3580             None
3581         } else if self.count == self.limit {
3582             Some(&self.split.finder.haystack()[..self.split.last])
3583         } else {
3584             self.split.next()
3585         }
3586     }
3587 }
3588 
3589 /// An iterator over all lines in a byte string, without their terminators.
3590 ///
3591 /// For this iterator, the only line terminators recognized are `\r\n` and
3592 /// `\n`.
3593 ///
3594 /// `'a` is the lifetime of the byte string being iterated over.
3595 #[derive(Clone, Debug)]
3596 pub struct Lines<'a> {
3597     it: LinesWithTerminator<'a>,
3598 }
3599 
3600 impl<'a> Lines<'a> {
new(bytes: &'a [u8]) -> Lines<'a>3601     fn new(bytes: &'a [u8]) -> Lines<'a> {
3602         Lines { it: LinesWithTerminator::new(bytes) }
3603     }
3604 
3605     /// Return a copy of the rest of the underlying bytes without affecting the
3606     /// iterator itself.
3607     ///
3608     /// # Examples
3609     ///
3610     /// Basic usage:
3611     ///
3612     /// ```
3613     /// use bstr::{B, ByteSlice};
3614     ///
3615     /// let s = b"\
3616     /// foo
3617     /// bar\r
3618     /// baz";
3619     /// let mut lines = s.lines();
3620     /// assert_eq!(lines.next(), Some(B("foo")));
3621     /// assert_eq!(lines.as_bytes(), B("bar\r\nbaz"));
3622     /// ```
as_bytes(&self) -> &'a [u8]3623     pub fn as_bytes(&self) -> &'a [u8] {
3624         self.it.bytes
3625     }
3626 }
3627 
3628 impl<'a> Iterator for Lines<'a> {
3629     type Item = &'a [u8];
3630 
3631     #[inline]
next(&mut self) -> Option<&'a [u8]>3632     fn next(&mut self) -> Option<&'a [u8]> {
3633         Some(trim_last_terminator(self.it.next()?))
3634     }
3635 }
3636 
3637 impl<'a> DoubleEndedIterator for Lines<'a> {
3638     #[inline]
next_back(&mut self) -> Option<Self::Item>3639     fn next_back(&mut self) -> Option<Self::Item> {
3640         Some(trim_last_terminator(self.it.next_back()?))
3641     }
3642 }
3643 
3644 impl<'a> iter::FusedIterator for Lines<'a> {}
3645 
3646 /// An iterator over all lines in a byte string, including their terminators.
3647 ///
3648 /// For this iterator, the only line terminator recognized is `\n`. (Since
3649 /// line terminators are included, this also handles `\r\n` line endings.)
3650 ///
3651 /// Line terminators are only included if they are present in the original
3652 /// byte string. For example, the last line in a byte string may not end with
3653 /// a line terminator.
3654 ///
3655 /// Concatenating all elements yielded by this iterator is guaranteed to yield
3656 /// the original byte string.
3657 ///
3658 /// `'a` is the lifetime of the byte string being iterated over.
3659 #[derive(Clone, Debug)]
3660 pub struct LinesWithTerminator<'a> {
3661     bytes: &'a [u8],
3662 }
3663 
3664 impl<'a> LinesWithTerminator<'a> {
new(bytes: &'a [u8]) -> LinesWithTerminator<'a>3665     fn new(bytes: &'a [u8]) -> LinesWithTerminator<'a> {
3666         LinesWithTerminator { bytes }
3667     }
3668 
3669     /// Return a copy of the rest of the underlying bytes without affecting the
3670     /// iterator itself.
3671     ///
3672     /// # Examples
3673     ///
3674     /// Basic usage:
3675     ///
3676     /// ```
3677     /// use bstr::{B, ByteSlice};
3678     ///
3679     /// let s = b"\
3680     /// foo
3681     /// bar\r
3682     /// baz";
3683     /// let mut lines = s.lines_with_terminator();
3684     /// assert_eq!(lines.next(), Some(B("foo\n")));
3685     /// assert_eq!(lines.as_bytes(), B("bar\r\nbaz"));
3686     /// ```
as_bytes(&self) -> &'a [u8]3687     pub fn as_bytes(&self) -> &'a [u8] {
3688         self.bytes
3689     }
3690 }
3691 
3692 impl<'a> Iterator for LinesWithTerminator<'a> {
3693     type Item = &'a [u8];
3694 
3695     #[inline]
next(&mut self) -> Option<&'a [u8]>3696     fn next(&mut self) -> Option<&'a [u8]> {
3697         match self.bytes.find_byte(b'\n') {
3698             None if self.bytes.is_empty() => None,
3699             None => {
3700                 let line = self.bytes;
3701                 self.bytes = b"";
3702                 Some(line)
3703             }
3704             Some(end) => {
3705                 let line = &self.bytes[..end + 1];
3706                 self.bytes = &self.bytes[end + 1..];
3707                 Some(line)
3708             }
3709         }
3710     }
3711 }
3712 
3713 impl<'a> DoubleEndedIterator for LinesWithTerminator<'a> {
3714     #[inline]
next_back(&mut self) -> Option<Self::Item>3715     fn next_back(&mut self) -> Option<Self::Item> {
3716         let end = self.bytes.len().checked_sub(1)?;
3717         match self.bytes[..end].rfind_byte(b'\n') {
3718             None => {
3719                 let line = self.bytes;
3720                 self.bytes = b"";
3721                 Some(line)
3722             }
3723             Some(end) => {
3724                 let line = &self.bytes[end + 1..];
3725                 self.bytes = &self.bytes[..end + 1];
3726                 Some(line)
3727             }
3728         }
3729     }
3730 }
3731 
3732 impl<'a> iter::FusedIterator for LinesWithTerminator<'a> {}
3733 
trim_last_terminator(mut s: &[u8]) -> &[u8]3734 fn trim_last_terminator(mut s: &[u8]) -> &[u8] {
3735     if s.last_byte() == Some(b'\n') {
3736         s = &s[..s.len() - 1];
3737         if s.last_byte() == Some(b'\r') {
3738             s = &s[..s.len() - 1];
3739         }
3740     }
3741     s
3742 }
3743 
3744 #[cfg(all(test, feature = "std"))]
3745 mod tests {
3746     use crate::{
3747         ext_slice::{ByteSlice, Lines, LinesWithTerminator, B},
3748         tests::LOSSY_TESTS,
3749     };
3750 
3751     #[test]
to_str_lossy()3752     fn to_str_lossy() {
3753         for (i, &(expected, input)) in LOSSY_TESTS.iter().enumerate() {
3754             let got = B(input).to_str_lossy();
3755             assert_eq!(
3756                 expected.as_bytes(),
3757                 got.as_bytes(),
3758                 "to_str_lossy(ith: {:?}, given: {:?})",
3759                 i,
3760                 input,
3761             );
3762 
3763             let mut got = String::new();
3764             B(input).to_str_lossy_into(&mut got);
3765             assert_eq!(
3766                 expected.as_bytes(),
3767                 got.as_bytes(),
3768                 "to_str_lossy_into",
3769             );
3770 
3771             let got = String::from_utf8_lossy(input);
3772             assert_eq!(expected.as_bytes(), got.as_bytes(), "std");
3773         }
3774     }
3775 
3776     #[test]
lines_iteration()3777     fn lines_iteration() {
3778         macro_rules! t {
3779             ($it:expr, $forward:expr) => {
3780                 let mut res: Vec<&[u8]> = Vec::from($forward);
3781                 assert_eq!($it.collect::<Vec<_>>(), res);
3782                 res.reverse();
3783                 assert_eq!($it.rev().collect::<Vec<_>>(), res);
3784             };
3785         }
3786 
3787         t!(Lines::new(b""), []);
3788         t!(LinesWithTerminator::new(b""), []);
3789 
3790         t!(Lines::new(b"\n"), [B("")]);
3791         t!(Lines::new(b"\r\n"), [B("")]);
3792         t!(LinesWithTerminator::new(b"\n"), [B("\n")]);
3793 
3794         t!(Lines::new(b"a"), [B("a")]);
3795         t!(LinesWithTerminator::new(b"a"), [B("a")]);
3796 
3797         t!(Lines::new(b"abc"), [B("abc")]);
3798         t!(LinesWithTerminator::new(b"abc"), [B("abc")]);
3799 
3800         t!(Lines::new(b"abc\n"), [B("abc")]);
3801         t!(Lines::new(b"abc\r\n"), [B("abc")]);
3802         t!(LinesWithTerminator::new(b"abc\n"), [B("abc\n")]);
3803 
3804         t!(Lines::new(b"abc\n\n"), [B("abc"), B("")]);
3805         t!(LinesWithTerminator::new(b"abc\n\n"), [B("abc\n"), B("\n")]);
3806 
3807         t!(Lines::new(b"abc\n\ndef"), [B("abc"), B(""), B("def")]);
3808         t!(
3809             LinesWithTerminator::new(b"abc\n\ndef"),
3810             [B("abc\n"), B("\n"), B("def")]
3811         );
3812 
3813         t!(Lines::new(b"abc\n\ndef\n"), [B("abc"), B(""), B("def")]);
3814         t!(
3815             LinesWithTerminator::new(b"abc\n\ndef\n"),
3816             [B("abc\n"), B("\n"), B("def\n")]
3817         );
3818 
3819         t!(Lines::new(b"\na\nb\n"), [B(""), B("a"), B("b")]);
3820         t!(
3821             LinesWithTerminator::new(b"\na\nb\n"),
3822             [B("\n"), B("a\n"), B("b\n")]
3823         );
3824 
3825         t!(Lines::new(b"\n\n\n"), [B(""), B(""), B("")]);
3826         t!(LinesWithTerminator::new(b"\n\n\n"), [B("\n"), B("\n"), B("\n")]);
3827     }
3828 }
3829