• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use core::fmt;
2 use core::iter;
3 use core::ops;
4 use core::ptr;
5 
6 use alloc::{borrow::Cow, string::String, vec, vec::Vec};
7 
8 #[cfg(feature = "std")]
9 use std::{
10     error,
11     ffi::{OsStr, OsString},
12     path::{Path, PathBuf},
13 };
14 
15 use crate::{
16     ext_slice::ByteSlice,
17     utf8::{self, Utf8Error},
18 };
19 
20 /// Concatenate the elements given by the iterator together into a single
21 /// `Vec<u8>`.
22 ///
23 /// The elements may be any type that can be cheaply converted into an `&[u8]`.
24 /// This includes, but is not limited to, `&str`, `&BStr` and `&[u8]` itself.
25 ///
26 /// # Examples
27 ///
28 /// Basic usage:
29 ///
30 /// ```
31 /// use bstr;
32 ///
33 /// let s = bstr::concat(&["foo", "bar", "baz"]);
34 /// assert_eq!(s, "foobarbaz".as_bytes());
35 /// ```
36 #[inline]
concat<T, I>(elements: I) -> Vec<u8> where T: AsRef<[u8]>, I: IntoIterator<Item = T>,37 pub fn concat<T, I>(elements: I) -> Vec<u8>
38 where
39     T: AsRef<[u8]>,
40     I: IntoIterator<Item = T>,
41 {
42     let mut dest = vec![];
43     for element in elements {
44         dest.push_str(element);
45     }
46     dest
47 }
48 
49 /// Join the elements given by the iterator with the given separator into a
50 /// single `Vec<u8>`.
51 ///
52 /// Both the separator and the elements may be any type that can be cheaply
53 /// converted into an `&[u8]`. This includes, but is not limited to,
54 /// `&str`, `&BStr` and `&[u8]` itself.
55 ///
56 /// # Examples
57 ///
58 /// Basic usage:
59 ///
60 /// ```
61 /// use bstr;
62 ///
63 /// let s = bstr::join(",", &["foo", "bar", "baz"]);
64 /// assert_eq!(s, "foo,bar,baz".as_bytes());
65 /// ```
66 #[inline]
join<B, T, I>(separator: B, elements: I) -> Vec<u8> where B: AsRef<[u8]>, T: AsRef<[u8]>, I: IntoIterator<Item = T>,67 pub fn join<B, T, I>(separator: B, elements: I) -> Vec<u8>
68 where
69     B: AsRef<[u8]>,
70     T: AsRef<[u8]>,
71     I: IntoIterator<Item = T>,
72 {
73     let mut it = elements.into_iter();
74     let mut dest = vec![];
75     match it.next() {
76         None => return dest,
77         Some(first) => {
78             dest.push_str(first);
79         }
80     }
81     for element in it {
82         dest.push_str(&separator);
83         dest.push_str(element);
84     }
85     dest
86 }
87 
88 impl ByteVec for Vec<u8> {
89     #[inline]
as_vec(&self) -> &Vec<u8>90     fn as_vec(&self) -> &Vec<u8> {
91         self
92     }
93 
94     #[inline]
as_vec_mut(&mut self) -> &mut Vec<u8>95     fn as_vec_mut(&mut self) -> &mut Vec<u8> {
96         self
97     }
98 
99     #[inline]
into_vec(self) -> Vec<u8>100     fn into_vec(self) -> Vec<u8> {
101         self
102     }
103 }
104 
105 /// Ensure that callers cannot implement `ByteSlice` by making an
106 /// umplementable trait its super trait.
107 mod private {
108     pub trait Sealed {}
109 }
110 impl private::Sealed for Vec<u8> {}
111 
112 /// A trait that extends `Vec<u8>` with string oriented methods.
113 ///
114 /// Note that when using the constructor methods, such as
115 /// `ByteVec::from_slice`, one should actually call them using the concrete
116 /// type. For example:
117 ///
118 /// ```
119 /// use bstr::{B, ByteVec};
120 ///
121 /// let s = Vec::from_slice(b"abc"); // NOT ByteVec::from_slice("...")
122 /// assert_eq!(s, B("abc"));
123 /// ```
124 ///
125 /// This trait is sealed and cannot be implemented outside of `bstr`.
126 pub trait ByteVec: private::Sealed {
127     /// A method for accessing the raw vector bytes of this type. This is
128     /// always a no-op and callers shouldn't care about it. This only exists
129     /// for making the extension trait work.
130     #[doc(hidden)]
as_vec(&self) -> &Vec<u8>131     fn as_vec(&self) -> &Vec<u8>;
132 
133     /// A method for accessing the raw vector bytes of this type, mutably. This
134     /// is always a no-op and callers shouldn't care about it. This only exists
135     /// for making the extension trait work.
136     #[doc(hidden)]
as_vec_mut(&mut self) -> &mut Vec<u8>137     fn as_vec_mut(&mut self) -> &mut Vec<u8>;
138 
139     /// A method for consuming ownership of this vector. This is always a no-op
140     /// and callers shouldn't care about it. This only exists for making the
141     /// extension trait work.
142     #[doc(hidden)]
into_vec(self) -> Vec<u8> where Self: Sized143     fn into_vec(self) -> Vec<u8>
144     where
145         Self: Sized;
146 
147     /// Create a new owned byte string from the given byte slice.
148     ///
149     /// # Examples
150     ///
151     /// Basic usage:
152     ///
153     /// ```
154     /// use bstr::{B, ByteVec};
155     ///
156     /// let s = Vec::from_slice(b"abc");
157     /// assert_eq!(s, B("abc"));
158     /// ```
159     #[inline]
from_slice<B: AsRef<[u8]>>(bytes: B) -> Vec<u8>160     fn from_slice<B: AsRef<[u8]>>(bytes: B) -> Vec<u8> {
161         bytes.as_ref().to_vec()
162     }
163 
164     /// Create a new byte string from an owned OS string.
165     ///
166     /// When the underlying bytes of OS strings are accessible, then this
167     /// always succeeds and is zero cost. Otherwise, this returns the given
168     /// `OsString` if it is not valid UTF-8.
169     ///
170     /// # Examples
171     ///
172     /// Basic usage:
173     ///
174     /// ```
175     /// use std::ffi::OsString;
176     ///
177     /// use bstr::{B, ByteVec};
178     ///
179     /// let os_str = OsString::from("foo");
180     /// let bs = Vec::from_os_string(os_str).expect("valid UTF-8");
181     /// assert_eq!(bs, B("foo"));
182     /// ```
183     #[inline]
184     #[cfg(feature = "std")]
from_os_string(os_str: OsString) -> Result<Vec<u8>, OsString>185     fn from_os_string(os_str: OsString) -> Result<Vec<u8>, OsString> {
186         #[cfg(unix)]
187         #[inline]
188         fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
189             use std::os::unix::ffi::OsStringExt;
190 
191             Ok(Vec::from(os_str.into_vec()))
192         }
193 
194         #[cfg(not(unix))]
195         #[inline]
196         fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
197             os_str.into_string().map(Vec::from)
198         }
199 
200         imp(os_str)
201     }
202 
203     /// Lossily create a new byte string from an OS string slice.
204     ///
205     /// When the underlying bytes of OS strings are accessible, then this is
206     /// zero cost and always returns a slice. Otherwise, a UTF-8 check is
207     /// performed and if the given OS string is not valid UTF-8, then it is
208     /// lossily decoded into valid UTF-8 (with invalid bytes replaced by the
209     /// Unicode replacement codepoint).
210     ///
211     /// # Examples
212     ///
213     /// Basic usage:
214     ///
215     /// ```
216     /// use std::ffi::OsStr;
217     ///
218     /// use bstr::{B, ByteVec};
219     ///
220     /// let os_str = OsStr::new("foo");
221     /// let bs = Vec::from_os_str_lossy(os_str);
222     /// assert_eq!(bs, B("foo"));
223     /// ```
224     #[inline]
225     #[cfg(feature = "std")]
from_os_str_lossy<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]>226     fn from_os_str_lossy<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
227         #[cfg(unix)]
228         #[inline]
229         fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
230             use std::os::unix::ffi::OsStrExt;
231 
232             Cow::Borrowed(os_str.as_bytes())
233         }
234 
235         #[cfg(not(unix))]
236         #[inline]
237         fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
238             match os_str.to_string_lossy() {
239                 Cow::Borrowed(x) => Cow::Borrowed(x.as_bytes()),
240                 Cow::Owned(x) => Cow::Owned(Vec::from(x)),
241             }
242         }
243 
244         imp(os_str)
245     }
246 
247     /// Create a new byte string from an owned file path.
248     ///
249     /// When the underlying bytes of paths are accessible, then this always
250     /// succeeds and is zero cost. Otherwise, this returns the given `PathBuf`
251     /// if it is not valid UTF-8.
252     ///
253     /// # Examples
254     ///
255     /// Basic usage:
256     ///
257     /// ```
258     /// use std::path::PathBuf;
259     ///
260     /// use bstr::{B, ByteVec};
261     ///
262     /// let path = PathBuf::from("foo");
263     /// let bs = Vec::from_path_buf(path).expect("must be valid UTF-8");
264     /// assert_eq!(bs, B("foo"));
265     /// ```
266     #[inline]
267     #[cfg(feature = "std")]
from_path_buf(path: PathBuf) -> Result<Vec<u8>, PathBuf>268     fn from_path_buf(path: PathBuf) -> Result<Vec<u8>, PathBuf> {
269         Vec::from_os_string(path.into_os_string()).map_err(PathBuf::from)
270     }
271 
272     /// Lossily create a new byte string from a file path.
273     ///
274     /// When the underlying bytes of paths are accessible, then this is
275     /// zero cost and always returns a slice. Otherwise, a UTF-8 check is
276     /// performed and if the given path is not valid UTF-8, then it is lossily
277     /// decoded into valid UTF-8 (with invalid bytes replaced by the Unicode
278     /// replacement codepoint).
279     ///
280     /// # Examples
281     ///
282     /// Basic usage:
283     ///
284     /// ```
285     /// use std::path::Path;
286     ///
287     /// use bstr::{B, ByteVec};
288     ///
289     /// let path = Path::new("foo");
290     /// let bs = Vec::from_path_lossy(path);
291     /// assert_eq!(bs, B("foo"));
292     /// ```
293     #[inline]
294     #[cfg(feature = "std")]
from_path_lossy<'a>(path: &'a Path) -> Cow<'a, [u8]>295     fn from_path_lossy<'a>(path: &'a Path) -> Cow<'a, [u8]> {
296         Vec::from_os_str_lossy(path.as_os_str())
297     }
298 
299     /// Appends the given byte to the end of this byte string.
300     ///
301     /// Note that this is equivalent to the generic `Vec::push` method. This
302     /// method is provided to permit callers to explicitly differentiate
303     /// between pushing bytes, codepoints and strings.
304     ///
305     /// # Examples
306     ///
307     /// Basic usage:
308     ///
309     /// ```
310     /// use bstr::ByteVec;
311     ///
312     /// let mut s = <Vec<u8>>::from("abc");
313     /// s.push_byte(b'\xE2');
314     /// s.push_byte(b'\x98');
315     /// s.push_byte(b'\x83');
316     /// assert_eq!(s, "abc☃".as_bytes());
317     /// ```
318     #[inline]
push_byte(&mut self, byte: u8)319     fn push_byte(&mut self, byte: u8) {
320         self.as_vec_mut().push(byte);
321     }
322 
323     /// Appends the given `char` to the end of this byte string.
324     ///
325     /// # Examples
326     ///
327     /// Basic usage:
328     ///
329     /// ```
330     /// use bstr::ByteVec;
331     ///
332     /// let mut s = <Vec<u8>>::from("abc");
333     /// s.push_char('1');
334     /// s.push_char('2');
335     /// s.push_char('3');
336     /// assert_eq!(s, "abc123".as_bytes());
337     /// ```
338     #[inline]
push_char(&mut self, ch: char)339     fn push_char(&mut self, ch: char) {
340         if ch.len_utf8() == 1 {
341             self.push_byte(ch as u8);
342             return;
343         }
344         self.as_vec_mut()
345             .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes());
346     }
347 
348     /// Appends the given slice to the end of this byte string. This accepts
349     /// any type that be converted to a `&[u8]`. This includes, but is not
350     /// limited to, `&str`, `&BStr`, and of course, `&[u8]` itself.
351     ///
352     /// # Examples
353     ///
354     /// Basic usage:
355     ///
356     /// ```
357     /// use bstr::ByteVec;
358     ///
359     /// let mut s = <Vec<u8>>::from("abc");
360     /// s.push_str(b"123");
361     /// assert_eq!(s, "abc123".as_bytes());
362     /// ```
363     #[inline]
push_str<B: AsRef<[u8]>>(&mut self, bytes: B)364     fn push_str<B: AsRef<[u8]>>(&mut self, bytes: B) {
365         self.as_vec_mut().extend_from_slice(bytes.as_ref());
366     }
367 
368     /// Converts a `Vec<u8>` into a `String` if and only if this byte string is
369     /// valid UTF-8.
370     ///
371     /// If it is not valid UTF-8, then a
372     /// [`FromUtf8Error`](struct.FromUtf8Error.html)
373     /// is returned. (This error can be used to examine why UTF-8 validation
374     /// failed, or to regain the original byte string.)
375     ///
376     /// # Examples
377     ///
378     /// Basic usage:
379     ///
380     /// ```
381     /// use bstr::ByteVec;
382     ///
383     /// let bytes = Vec::from("hello");
384     /// let string = bytes.into_string().unwrap();
385     ///
386     /// assert_eq!("hello", string);
387     /// ```
388     ///
389     /// If this byte string is not valid UTF-8, then an error will be returned.
390     /// That error can then be used to inspect the location at which invalid
391     /// UTF-8 was found, or to regain the original byte string:
392     ///
393     /// ```
394     /// use bstr::{B, ByteVec};
395     ///
396     /// let bytes = Vec::from_slice(b"foo\xFFbar");
397     /// let err = bytes.into_string().unwrap_err();
398     ///
399     /// assert_eq!(err.utf8_error().valid_up_to(), 3);
400     /// assert_eq!(err.utf8_error().error_len(), Some(1));
401     ///
402     /// // At no point in this example is an allocation performed.
403     /// let bytes = Vec::from(err.into_vec());
404     /// assert_eq!(bytes, B(b"foo\xFFbar"));
405     /// ```
406     #[inline]
into_string(self) -> Result<String, FromUtf8Error> where Self: Sized,407     fn into_string(self) -> Result<String, FromUtf8Error>
408     where
409         Self: Sized,
410     {
411         match utf8::validate(self.as_vec()) {
412             Err(err) => Err(FromUtf8Error { original: self.into_vec(), err }),
413             Ok(()) => {
414                 // SAFETY: This is safe because of the guarantees provided by
415                 // utf8::validate.
416                 unsafe { Ok(self.into_string_unchecked()) }
417             }
418         }
419     }
420 
421     /// Lossily converts a `Vec<u8>` into a `String`. If this byte string
422     /// contains invalid UTF-8, then the invalid bytes are replaced with the
423     /// Unicode replacement codepoint.
424     ///
425     /// # Examples
426     ///
427     /// Basic usage:
428     ///
429     /// ```
430     /// use bstr::ByteVec;
431     ///
432     /// let bytes = Vec::from_slice(b"foo\xFFbar");
433     /// let string = bytes.into_string_lossy();
434     /// assert_eq!(string, "foo\u{FFFD}bar");
435     /// ```
436     #[inline]
into_string_lossy(self) -> String where Self: Sized,437     fn into_string_lossy(self) -> String
438     where
439         Self: Sized,
440     {
441         match self.as_vec().to_str_lossy() {
442             Cow::Borrowed(_) => {
443                 // SAFETY: to_str_lossy() returning a Cow::Borrowed guarantees
444                 // the entire string is valid utf8.
445                 unsafe { self.into_string_unchecked() }
446             }
447             Cow::Owned(s) => s,
448         }
449     }
450 
451     /// Unsafely convert this byte string into a `String`, without checking for
452     /// valid UTF-8.
453     ///
454     /// # Safety
455     ///
456     /// Callers *must* ensure that this byte string is valid UTF-8 before
457     /// calling this method. Converting a byte string into a `String` that is
458     /// not valid UTF-8 is considered undefined behavior.
459     ///
460     /// This routine is useful in performance sensitive contexts where the
461     /// UTF-8 validity of the byte string is already known and it is
462     /// undesirable to pay the cost of an additional UTF-8 validation check
463     /// that [`into_string`](#method.into_string) performs.
464     ///
465     /// # Examples
466     ///
467     /// Basic usage:
468     ///
469     /// ```
470     /// use bstr::ByteVec;
471     ///
472     /// // SAFETY: This is safe because string literals are guaranteed to be
473     /// // valid UTF-8 by the Rust compiler.
474     /// let s = unsafe { Vec::from("☃βツ").into_string_unchecked() };
475     /// assert_eq!("☃βツ", s);
476     /// ```
477     #[inline]
into_string_unchecked(self) -> String where Self: Sized,478     unsafe fn into_string_unchecked(self) -> String
479     where
480         Self: Sized,
481     {
482         String::from_utf8_unchecked(self.into_vec())
483     }
484 
485     /// Converts this byte string into an OS string, in place.
486     ///
487     /// When OS strings can be constructed from arbitrary byte sequences, this
488     /// always succeeds and is zero cost. Otherwise, if this byte string is not
489     /// valid UTF-8, then an error (with the original byte string) is returned.
490     ///
491     /// # Examples
492     ///
493     /// Basic usage:
494     ///
495     /// ```
496     /// use std::ffi::OsStr;
497     ///
498     /// use bstr::ByteVec;
499     ///
500     /// let bs = Vec::from("foo");
501     /// let os_str = bs.into_os_string().expect("should be valid UTF-8");
502     /// assert_eq!(os_str, OsStr::new("foo"));
503     /// ```
504     #[cfg(feature = "std")]
505     #[inline]
into_os_string(self) -> Result<OsString, FromUtf8Error> where Self: Sized,506     fn into_os_string(self) -> Result<OsString, FromUtf8Error>
507     where
508         Self: Sized,
509     {
510         #[cfg(unix)]
511         #[inline]
512         fn imp(v: Vec<u8>) -> Result<OsString, FromUtf8Error> {
513             use std::os::unix::ffi::OsStringExt;
514 
515             Ok(OsString::from_vec(v))
516         }
517 
518         #[cfg(not(unix))]
519         #[inline]
520         fn imp(v: Vec<u8>) -> Result<OsString, FromUtf8Error> {
521             v.into_string().map(OsString::from)
522         }
523 
524         imp(self.into_vec())
525     }
526 
527     /// Lossily converts this byte string into an OS string, in place.
528     ///
529     /// When OS strings can be constructed from arbitrary byte sequences, this
530     /// is zero cost and always returns a slice. Otherwise, this will perform a
531     /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
532     /// the Unicode replacement codepoint.
533     ///
534     /// Note that this can prevent the correct roundtripping of file paths when
535     /// the representation of `OsString` is opaque.
536     ///
537     /// # Examples
538     ///
539     /// Basic usage:
540     ///
541     /// ```
542     /// use bstr::ByteVec;
543     ///
544     /// let bs = Vec::from_slice(b"foo\xFFbar");
545     /// let os_str = bs.into_os_string_lossy();
546     /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
547     /// ```
548     #[inline]
549     #[cfg(feature = "std")]
into_os_string_lossy(self) -> OsString where Self: Sized,550     fn into_os_string_lossy(self) -> OsString
551     where
552         Self: Sized,
553     {
554         #[cfg(unix)]
555         #[inline]
556         fn imp(v: Vec<u8>) -> OsString {
557             use std::os::unix::ffi::OsStringExt;
558 
559             OsString::from_vec(v)
560         }
561 
562         #[cfg(not(unix))]
563         #[inline]
564         fn imp(v: Vec<u8>) -> OsString {
565             OsString::from(v.into_string_lossy())
566         }
567 
568         imp(self.into_vec())
569     }
570 
571     /// Converts this byte string into an owned file path, in place.
572     ///
573     /// When paths can be constructed from arbitrary byte sequences, this
574     /// always succeeds and is zero cost. Otherwise, if this byte string is not
575     /// valid UTF-8, then an error (with the original byte string) is returned.
576     ///
577     /// # Examples
578     ///
579     /// Basic usage:
580     ///
581     /// ```
582     /// use bstr::ByteVec;
583     ///
584     /// let bs = Vec::from("foo");
585     /// let path = bs.into_path_buf().expect("should be valid UTF-8");
586     /// assert_eq!(path.as_os_str(), "foo");
587     /// ```
588     #[cfg(feature = "std")]
589     #[inline]
into_path_buf(self) -> Result<PathBuf, FromUtf8Error> where Self: Sized,590     fn into_path_buf(self) -> Result<PathBuf, FromUtf8Error>
591     where
592         Self: Sized,
593     {
594         self.into_os_string().map(PathBuf::from)
595     }
596 
597     /// Lossily converts this byte string into an owned file path, in place.
598     ///
599     /// When paths can be constructed from arbitrary byte sequences, this is
600     /// zero cost and always returns a slice. Otherwise, this will perform a
601     /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
602     /// the Unicode replacement codepoint.
603     ///
604     /// Note that this can prevent the correct roundtripping of file paths when
605     /// the representation of `PathBuf` is opaque.
606     ///
607     /// # Examples
608     ///
609     /// Basic usage:
610     ///
611     /// ```
612     /// use bstr::ByteVec;
613     ///
614     /// let bs = Vec::from_slice(b"foo\xFFbar");
615     /// let path = bs.into_path_buf_lossy();
616     /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
617     /// ```
618     #[inline]
619     #[cfg(feature = "std")]
into_path_buf_lossy(self) -> PathBuf where Self: Sized,620     fn into_path_buf_lossy(self) -> PathBuf
621     where
622         Self: Sized,
623     {
624         PathBuf::from(self.into_os_string_lossy())
625     }
626 
627     /// Removes the last byte from this `Vec<u8>` and returns it.
628     ///
629     /// If this byte string is empty, then `None` is returned.
630     ///
631     /// If the last codepoint in this byte string is not ASCII, then removing
632     /// the last byte could make this byte string contain invalid UTF-8.
633     ///
634     /// Note that this is equivalent to the generic `Vec::pop` method. This
635     /// method is provided to permit callers to explicitly differentiate
636     /// between popping bytes and codepoints.
637     ///
638     /// # Examples
639     ///
640     /// Basic usage:
641     ///
642     /// ```
643     /// use bstr::ByteVec;
644     ///
645     /// let mut s = Vec::from("foo");
646     /// assert_eq!(s.pop_byte(), Some(b'o'));
647     /// assert_eq!(s.pop_byte(), Some(b'o'));
648     /// assert_eq!(s.pop_byte(), Some(b'f'));
649     /// assert_eq!(s.pop_byte(), None);
650     /// ```
651     #[inline]
pop_byte(&mut self) -> Option<u8>652     fn pop_byte(&mut self) -> Option<u8> {
653         self.as_vec_mut().pop()
654     }
655 
656     /// Removes the last codepoint from this `Vec<u8>` and returns it.
657     ///
658     /// If this byte string is empty, then `None` is returned. If the last
659     /// bytes of this byte string do not correspond to a valid UTF-8 code unit
660     /// sequence, then the Unicode replacement codepoint is yielded instead in
661     /// accordance with the
662     /// [replacement codepoint substitution policy](index.html#handling-of-invalid-utf8-8).
663     ///
664     /// # Examples
665     ///
666     /// Basic usage:
667     ///
668     /// ```
669     /// use bstr::ByteVec;
670     ///
671     /// let mut s = Vec::from("foo");
672     /// assert_eq!(s.pop_char(), Some('o'));
673     /// assert_eq!(s.pop_char(), Some('o'));
674     /// assert_eq!(s.pop_char(), Some('f'));
675     /// assert_eq!(s.pop_char(), None);
676     /// ```
677     ///
678     /// This shows the replacement codepoint substitution policy. Note that
679     /// the first pop yields a replacement codepoint but actually removes two
680     /// bytes. This is in contrast with subsequent pops when encountering
681     /// `\xFF` since `\xFF` is never a valid prefix for any valid UTF-8
682     /// code unit sequence.
683     ///
684     /// ```
685     /// use bstr::ByteVec;
686     ///
687     /// let mut s = Vec::from_slice(b"f\xFF\xFF\xFFoo\xE2\x98");
688     /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
689     /// assert_eq!(s.pop_char(), Some('o'));
690     /// assert_eq!(s.pop_char(), Some('o'));
691     /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
692     /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
693     /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
694     /// assert_eq!(s.pop_char(), Some('f'));
695     /// assert_eq!(s.pop_char(), None);
696     /// ```
697     #[inline]
pop_char(&mut self) -> Option<char>698     fn pop_char(&mut self) -> Option<char> {
699         let (ch, size) = utf8::decode_last_lossy(self.as_vec());
700         if size == 0 {
701             return None;
702         }
703         let new_len = self.as_vec().len() - size;
704         self.as_vec_mut().truncate(new_len);
705         Some(ch)
706     }
707 
708     /// Removes a `char` from this `Vec<u8>` at the given byte position and
709     /// returns it.
710     ///
711     /// If the bytes at the given position do not lead to a valid UTF-8 code
712     /// unit sequence, then a
713     /// [replacement codepoint is returned instead](index.html#handling-of-invalid-utf8-8).
714     ///
715     /// # Panics
716     ///
717     /// Panics if `at` is larger than or equal to this byte string's length.
718     ///
719     /// # Examples
720     ///
721     /// Basic usage:
722     ///
723     /// ```
724     /// use bstr::ByteVec;
725     ///
726     /// let mut s = Vec::from("foo☃bar");
727     /// assert_eq!(s.remove_char(3), '☃');
728     /// assert_eq!(s, b"foobar");
729     /// ```
730     ///
731     /// This example shows how the Unicode replacement codepoint policy is
732     /// used:
733     ///
734     /// ```
735     /// use bstr::ByteVec;
736     ///
737     /// let mut s = Vec::from_slice(b"foo\xFFbar");
738     /// assert_eq!(s.remove_char(3), '\u{FFFD}');
739     /// assert_eq!(s, b"foobar");
740     /// ```
741     #[inline]
remove_char(&mut self, at: usize) -> char742     fn remove_char(&mut self, at: usize) -> char {
743         let (ch, size) = utf8::decode_lossy(&self.as_vec()[at..]);
744         assert!(
745             size > 0,
746             "expected {} to be less than {}",
747             at,
748             self.as_vec().len(),
749         );
750         self.as_vec_mut().drain(at..at + size);
751         ch
752     }
753 
754     /// Inserts the given codepoint into this `Vec<u8>` at a particular byte
755     /// position.
756     ///
757     /// This is an `O(n)` operation as it may copy a number of elements in this
758     /// byte string proportional to its length.
759     ///
760     /// # Panics
761     ///
762     /// Panics if `at` is larger than the byte string's length.
763     ///
764     /// # Examples
765     ///
766     /// Basic usage:
767     ///
768     /// ```
769     /// use bstr::ByteVec;
770     ///
771     /// let mut s = Vec::from("foobar");
772     /// s.insert_char(3, '☃');
773     /// assert_eq!(s, "foo☃bar".as_bytes());
774     /// ```
775     #[inline]
insert_char(&mut self, at: usize, ch: char)776     fn insert_char(&mut self, at: usize, ch: char) {
777         self.insert_str(at, ch.encode_utf8(&mut [0; 4]).as_bytes());
778     }
779 
780     /// Inserts the given byte string into this byte string at a particular
781     /// byte position.
782     ///
783     /// This is an `O(n)` operation as it may copy a number of elements in this
784     /// byte string proportional to its length.
785     ///
786     /// The given byte string may be any type that can be cheaply converted
787     /// into a `&[u8]`. This includes, but is not limited to, `&str` and
788     /// `&[u8]`.
789     ///
790     /// # Panics
791     ///
792     /// Panics if `at` is larger than the byte string's length.
793     ///
794     /// # Examples
795     ///
796     /// Basic usage:
797     ///
798     /// ```
799     /// use bstr::ByteVec;
800     ///
801     /// let mut s = Vec::from("foobar");
802     /// s.insert_str(3, "☃☃☃");
803     /// assert_eq!(s, "foo☃☃☃bar".as_bytes());
804     /// ```
805     #[inline]
insert_str<B: AsRef<[u8]>>(&mut self, at: usize, bytes: B)806     fn insert_str<B: AsRef<[u8]>>(&mut self, at: usize, bytes: B) {
807         let bytes = bytes.as_ref();
808         let len = self.as_vec().len();
809         assert!(at <= len, "expected {} to be <= {}", at, len);
810 
811         // SAFETY: We'd like to efficiently splice in the given bytes into
812         // this byte string. Since we are only working with `u8` elements here,
813         // we only need to consider whether our bounds are correct and whether
814         // our byte string has enough space.
815         self.as_vec_mut().reserve(bytes.len());
816         unsafe {
817             // Shift bytes after `at` over by the length of `bytes` to make
818             // room for it. This requires referencing two regions of memory
819             // that may overlap, so we use ptr::copy.
820             ptr::copy(
821                 self.as_vec().as_ptr().add(at),
822                 self.as_vec_mut().as_mut_ptr().add(at + bytes.len()),
823                 len - at,
824             );
825             // Now copy the bytes given into the room we made above. In this
826             // case, we know that the given bytes cannot possibly overlap
827             // with this byte string since we have a mutable borrow of the
828             // latter. Thus, we can use a nonoverlapping copy.
829             ptr::copy_nonoverlapping(
830                 bytes.as_ptr(),
831                 self.as_vec_mut().as_mut_ptr().add(at),
832                 bytes.len(),
833             );
834             self.as_vec_mut().set_len(len + bytes.len());
835         }
836     }
837 
838     /// Removes the specified range in this byte string and replaces it with
839     /// the given bytes. The given bytes do not need to have the same length
840     /// as the range provided.
841     ///
842     /// # Panics
843     ///
844     /// Panics if the given range is invalid.
845     ///
846     /// # Examples
847     ///
848     /// Basic usage:
849     ///
850     /// ```
851     /// use bstr::ByteVec;
852     ///
853     /// let mut s = Vec::from("foobar");
854     /// s.replace_range(2..4, "xxxxx");
855     /// assert_eq!(s, "foxxxxxar".as_bytes());
856     /// ```
857     #[inline]
replace_range<R, B>(&mut self, range: R, replace_with: B) where R: ops::RangeBounds<usize>, B: AsRef<[u8]>,858     fn replace_range<R, B>(&mut self, range: R, replace_with: B)
859     where
860         R: ops::RangeBounds<usize>,
861         B: AsRef<[u8]>,
862     {
863         self.as_vec_mut().splice(range, replace_with.as_ref().iter().cloned());
864     }
865 
866     /// Creates a draining iterator that removes the specified range in this
867     /// `Vec<u8>` and yields each of the removed bytes.
868     ///
869     /// Note that the elements specified by the given range are removed
870     /// regardless of whether the returned iterator is fully exhausted.
871     ///
872     /// Also note that is is unspecified how many bytes are removed from the
873     /// `Vec<u8>` if the `DrainBytes` iterator is leaked.
874     ///
875     /// # Panics
876     ///
877     /// Panics if the given range is not valid.
878     ///
879     /// # Examples
880     ///
881     /// Basic usage:
882     ///
883     /// ```
884     /// use bstr::ByteVec;
885     ///
886     /// let mut s = Vec::from("foobar");
887     /// {
888     ///     let mut drainer = s.drain_bytes(2..4);
889     ///     assert_eq!(drainer.next(), Some(b'o'));
890     ///     assert_eq!(drainer.next(), Some(b'b'));
891     ///     assert_eq!(drainer.next(), None);
892     /// }
893     /// assert_eq!(s, "foar".as_bytes());
894     /// ```
895     #[inline]
drain_bytes<R>(&mut self, range: R) -> DrainBytes<'_> where R: ops::RangeBounds<usize>,896     fn drain_bytes<R>(&mut self, range: R) -> DrainBytes<'_>
897     where
898         R: ops::RangeBounds<usize>,
899     {
900         DrainBytes { it: self.as_vec_mut().drain(range) }
901     }
902 }
903 
904 /// A draining byte oriented iterator for `Vec<u8>`.
905 ///
906 /// This iterator is created by
907 /// [`ByteVec::drain_bytes`](trait.ByteVec.html#method.drain_bytes).
908 ///
909 /// # Examples
910 ///
911 /// Basic usage:
912 ///
913 /// ```
914 /// use bstr::ByteVec;
915 ///
916 /// let mut s = Vec::from("foobar");
917 /// {
918 ///     let mut drainer = s.drain_bytes(2..4);
919 ///     assert_eq!(drainer.next(), Some(b'o'));
920 ///     assert_eq!(drainer.next(), Some(b'b'));
921 ///     assert_eq!(drainer.next(), None);
922 /// }
923 /// assert_eq!(s, "foar".as_bytes());
924 /// ```
925 #[derive(Debug)]
926 pub struct DrainBytes<'a> {
927     it: vec::Drain<'a, u8>,
928 }
929 
930 impl<'a> iter::FusedIterator for DrainBytes<'a> {}
931 
932 impl<'a> Iterator for DrainBytes<'a> {
933     type Item = u8;
934 
935     #[inline]
next(&mut self) -> Option<u8>936     fn next(&mut self) -> Option<u8> {
937         self.it.next()
938     }
939 }
940 
941 impl<'a> DoubleEndedIterator for DrainBytes<'a> {
942     #[inline]
next_back(&mut self) -> Option<u8>943     fn next_back(&mut self) -> Option<u8> {
944         self.it.next_back()
945     }
946 }
947 
948 impl<'a> ExactSizeIterator for DrainBytes<'a> {
949     #[inline]
len(&self) -> usize950     fn len(&self) -> usize {
951         self.it.len()
952     }
953 }
954 
955 /// An error that may occur when converting a `Vec<u8>` to a `String`.
956 ///
957 /// This error includes the original `Vec<u8>` that failed to convert to a
958 /// `String`. This permits callers to recover the allocation used even if it
959 /// it not valid UTF-8.
960 ///
961 /// # Examples
962 ///
963 /// Basic usage:
964 ///
965 /// ```
966 /// use bstr::{B, ByteVec};
967 ///
968 /// let bytes = Vec::from_slice(b"foo\xFFbar");
969 /// let err = bytes.into_string().unwrap_err();
970 ///
971 /// assert_eq!(err.utf8_error().valid_up_to(), 3);
972 /// assert_eq!(err.utf8_error().error_len(), Some(1));
973 ///
974 /// // At no point in this example is an allocation performed.
975 /// let bytes = Vec::from(err.into_vec());
976 /// assert_eq!(bytes, B(b"foo\xFFbar"));
977 /// ```
978 #[derive(Debug, Eq, PartialEq)]
979 pub struct FromUtf8Error {
980     original: Vec<u8>,
981     err: Utf8Error,
982 }
983 
984 impl FromUtf8Error {
985     /// Return the original bytes as a slice that failed to convert to a
986     /// `String`.
987     ///
988     /// # Examples
989     ///
990     /// Basic usage:
991     ///
992     /// ```
993     /// use bstr::{B, ByteVec};
994     ///
995     /// let bytes = Vec::from_slice(b"foo\xFFbar");
996     /// let err = bytes.into_string().unwrap_err();
997     ///
998     /// // At no point in this example is an allocation performed.
999     /// assert_eq!(err.as_bytes(), B(b"foo\xFFbar"));
1000     /// ```
1001     #[inline]
as_bytes(&self) -> &[u8]1002     pub fn as_bytes(&self) -> &[u8] {
1003         &self.original
1004     }
1005 
1006     /// Consume this error and return the original byte string that failed to
1007     /// convert to a `String`.
1008     ///
1009     /// # Examples
1010     ///
1011     /// Basic usage:
1012     ///
1013     /// ```
1014     /// use bstr::{B, ByteVec};
1015     ///
1016     /// let bytes = Vec::from_slice(b"foo\xFFbar");
1017     /// let err = bytes.into_string().unwrap_err();
1018     /// let original = err.into_vec();
1019     ///
1020     /// // At no point in this example is an allocation performed.
1021     /// assert_eq!(original, B(b"foo\xFFbar"));
1022     /// ```
1023     #[inline]
into_vec(self) -> Vec<u8>1024     pub fn into_vec(self) -> Vec<u8> {
1025         self.original
1026     }
1027 
1028     /// Return the underlying UTF-8 error that occurred. This error provides
1029     /// information on the nature and location of the invalid UTF-8 detected.
1030     ///
1031     /// # Examples
1032     ///
1033     /// Basic usage:
1034     ///
1035     /// ```
1036     /// use bstr::{B, ByteVec};
1037     ///
1038     /// let bytes = Vec::from_slice(b"foo\xFFbar");
1039     /// let err = bytes.into_string().unwrap_err();
1040     ///
1041     /// assert_eq!(err.utf8_error().valid_up_to(), 3);
1042     /// assert_eq!(err.utf8_error().error_len(), Some(1));
1043     /// ```
1044     #[inline]
utf8_error(&self) -> &Utf8Error1045     pub fn utf8_error(&self) -> &Utf8Error {
1046         &self.err
1047     }
1048 }
1049 
1050 #[cfg(feature = "std")]
1051 impl error::Error for FromUtf8Error {
1052     #[inline]
description(&self) -> &str1053     fn description(&self) -> &str {
1054         "invalid UTF-8 vector"
1055     }
1056 }
1057 
1058 impl fmt::Display for FromUtf8Error {
1059     #[inline]
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1060     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1061         write!(f, "{}", self.err)
1062     }
1063 }
1064 
1065 #[cfg(all(test, feature = "std"))]
1066 mod tests {
1067     use crate::ext_vec::ByteVec;
1068 
1069     #[test]
insert()1070     fn insert() {
1071         let mut s = vec![];
1072         s.insert_str(0, "foo");
1073         assert_eq!(s, "foo".as_bytes());
1074 
1075         let mut s = Vec::from("a");
1076         s.insert_str(0, "foo");
1077         assert_eq!(s, "fooa".as_bytes());
1078 
1079         let mut s = Vec::from("a");
1080         s.insert_str(1, "foo");
1081         assert_eq!(s, "afoo".as_bytes());
1082 
1083         let mut s = Vec::from("foobar");
1084         s.insert_str(3, "quux");
1085         assert_eq!(s, "fooquuxbar".as_bytes());
1086 
1087         let mut s = Vec::from("foobar");
1088         s.insert_str(3, "x");
1089         assert_eq!(s, "fooxbar".as_bytes());
1090 
1091         let mut s = Vec::from("foobar");
1092         s.insert_str(0, "x");
1093         assert_eq!(s, "xfoobar".as_bytes());
1094 
1095         let mut s = Vec::from("foobar");
1096         s.insert_str(6, "x");
1097         assert_eq!(s, "foobarx".as_bytes());
1098 
1099         let mut s = Vec::from("foobar");
1100         s.insert_str(3, "quuxbazquux");
1101         assert_eq!(s, "fooquuxbazquuxbar".as_bytes());
1102     }
1103 
1104     #[test]
1105     #[should_panic]
insert_fail1()1106     fn insert_fail1() {
1107         let mut s = vec![];
1108         s.insert_str(1, "foo");
1109     }
1110 
1111     #[test]
1112     #[should_panic]
insert_fail2()1113     fn insert_fail2() {
1114         let mut s = Vec::from("a");
1115         s.insert_str(2, "foo");
1116     }
1117 
1118     #[test]
1119     #[should_panic]
insert_fail3()1120     fn insert_fail3() {
1121         let mut s = Vec::from("foobar");
1122         s.insert_str(7, "foo");
1123     }
1124 }
1125