1 #![allow(unused_imports)]
2
3 use std::borrow::Cow;
4 use std::error;
5 use std::ffi::{OsStr, OsString};
6 use std::fmt;
7 use std::iter;
8 use std::ops;
9 use std::path::{Path, PathBuf};
10 use std::ptr;
11 use std::str;
12 use std::vec;
13
14 use ext_slice::ByteSlice;
15 use utf8::{self, Utf8Error};
16
17 /// Concatenate the elements given by the iterator together into a single
18 /// `Vec<u8>`.
19 ///
20 /// The elements may be any type that can be cheaply converted into an `&[u8]`.
21 /// This includes, but is not limited to, `&str`, `&BStr` and `&[u8]` itself.
22 ///
23 /// # Examples
24 ///
25 /// Basic usage:
26 ///
27 /// ```
28 /// use bstr;
29 ///
30 /// let s = bstr::concat(&["foo", "bar", "baz"]);
31 /// assert_eq!(s, "foobarbaz".as_bytes());
32 /// ```
33 #[inline]
concat<T, I>(elements: I) -> Vec<u8> where T: AsRef<[u8]>, I: IntoIterator<Item = T>,34 pub fn concat<T, I>(elements: I) -> Vec<u8>
35 where
36 T: AsRef<[u8]>,
37 I: IntoIterator<Item = T>,
38 {
39 let mut dest = vec![];
40 for element in elements {
41 dest.push_str(element);
42 }
43 dest
44 }
45
46 /// Join the elements given by the iterator with the given separator into a
47 /// single `Vec<u8>`.
48 ///
49 /// Both the separator and the elements may be any type that can be cheaply
50 /// converted into an `&[u8]`. This includes, but is not limited to,
51 /// `&str`, `&BStr` and `&[u8]` itself.
52 ///
53 /// # Examples
54 ///
55 /// Basic usage:
56 ///
57 /// ```
58 /// use bstr;
59 ///
60 /// let s = bstr::join(",", &["foo", "bar", "baz"]);
61 /// assert_eq!(s, "foo,bar,baz".as_bytes());
62 /// ```
63 #[inline]
join<B, T, I>(separator: B, elements: I) -> Vec<u8> where B: AsRef<[u8]>, T: AsRef<[u8]>, I: IntoIterator<Item = T>,64 pub fn join<B, T, I>(separator: B, elements: I) -> Vec<u8>
65 where
66 B: AsRef<[u8]>,
67 T: AsRef<[u8]>,
68 I: IntoIterator<Item = T>,
69 {
70 let mut it = elements.into_iter();
71 let mut dest = vec![];
72 match it.next() {
73 None => return dest,
74 Some(first) => {
75 dest.push_str(first);
76 }
77 }
78 for element in it {
79 dest.push_str(&separator);
80 dest.push_str(element);
81 }
82 dest
83 }
84
85 impl ByteVec for Vec<u8> {
86 #[inline]
as_vec(&self) -> &Vec<u8>87 fn as_vec(&self) -> &Vec<u8> {
88 self
89 }
90
91 #[inline]
as_vec_mut(&mut self) -> &mut Vec<u8>92 fn as_vec_mut(&mut self) -> &mut Vec<u8> {
93 self
94 }
95
96 #[inline]
into_vec(self) -> Vec<u8>97 fn into_vec(self) -> Vec<u8> {
98 self
99 }
100 }
101
102 /// Ensure that callers cannot implement `ByteSlice` by making an
103 /// umplementable trait its super trait.
104 pub trait Sealed {}
105 impl Sealed for Vec<u8> {}
106
107 /// A trait that extends `Vec<u8>` with string oriented methods.
108 ///
109 /// Note that when using the constructor methods, such as
110 /// `ByteVec::from_slice`, one should actually call them using the concrete
111 /// type. For example:
112 ///
113 /// ```
114 /// use bstr::{B, ByteVec};
115 ///
116 /// let s = Vec::from_slice(b"abc"); // NOT ByteVec::from_slice("...")
117 /// assert_eq!(s, B("abc"));
118 /// ```
119 pub trait ByteVec: Sealed {
120 /// A method for accessing the raw vector bytes of this type. This is
121 /// always a no-op and callers shouldn't care about it. This only exists
122 /// for making the extension trait work.
123 #[doc(hidden)]
as_vec(&self) -> &Vec<u8>124 fn as_vec(&self) -> &Vec<u8>;
125
126 /// A method for accessing the raw vector bytes of this type, mutably. This
127 /// is always a no-op and callers shouldn't care about it. This only exists
128 /// for making the extension trait work.
129 #[doc(hidden)]
as_vec_mut(&mut self) -> &mut Vec<u8>130 fn as_vec_mut(&mut self) -> &mut Vec<u8>;
131
132 /// A method for consuming ownership of this vector. This is always a no-op
133 /// and callers shouldn't care about it. This only exists for making the
134 /// extension trait work.
135 #[doc(hidden)]
into_vec(self) -> Vec<u8> where Self: Sized136 fn into_vec(self) -> Vec<u8>
137 where
138 Self: Sized;
139
140 /// Create a new owned byte string from the given byte slice.
141 ///
142 /// # Examples
143 ///
144 /// Basic usage:
145 ///
146 /// ```
147 /// use bstr::{B, ByteVec};
148 ///
149 /// let s = Vec::from_slice(b"abc");
150 /// assert_eq!(s, B("abc"));
151 /// ```
152 #[inline]
from_slice<B: AsRef<[u8]>>(bytes: B) -> Vec<u8>153 fn from_slice<B: AsRef<[u8]>>(bytes: B) -> Vec<u8> {
154 bytes.as_ref().to_vec()
155 }
156
157 /// Create a new byte string from an owned OS string.
158 ///
159 /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
160 /// this returns the original OS string if it is not valid UTF-8.
161 ///
162 /// # Examples
163 ///
164 /// Basic usage:
165 ///
166 /// ```
167 /// use std::ffi::OsString;
168 ///
169 /// use bstr::{B, ByteVec};
170 ///
171 /// let os_str = OsString::from("foo");
172 /// let bs = Vec::from_os_string(os_str).expect("valid UTF-8");
173 /// assert_eq!(bs, B("foo"));
174 /// ```
175 #[inline]
from_os_string(os_str: OsString) -> Result<Vec<u8>, OsString>176 fn from_os_string(os_str: OsString) -> Result<Vec<u8>, OsString> {
177 #[cfg(unix)]
178 #[inline]
179 fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
180 use std::os::unix::ffi::OsStringExt;
181
182 Ok(Vec::from(os_str.into_vec()))
183 }
184
185 #[cfg(not(unix))]
186 #[inline]
187 fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
188 os_str.into_string().map(Vec::from)
189 }
190
191 imp(os_str)
192 }
193
194 /// Lossily create a new byte string from an OS string slice.
195 ///
196 /// On Unix, this always succeeds, is zero cost and always returns a slice.
197 /// On non-Unix systems, this does a UTF-8 check. If the given OS string
198 /// slice is not valid UTF-8, then it is lossily decoded into valid UTF-8
199 /// (with invalid bytes replaced by the Unicode replacement codepoint).
200 ///
201 /// # Examples
202 ///
203 /// Basic usage:
204 ///
205 /// ```
206 /// use std::ffi::OsStr;
207 ///
208 /// use bstr::{B, ByteVec};
209 ///
210 /// let os_str = OsStr::new("foo");
211 /// let bs = Vec::from_os_str_lossy(os_str);
212 /// assert_eq!(bs, B("foo"));
213 /// ```
214 #[inline]
from_os_str_lossy<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]>215 fn from_os_str_lossy<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
216 #[cfg(unix)]
217 #[inline]
218 fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
219 use std::os::unix::ffi::OsStrExt;
220
221 Cow::Borrowed(os_str.as_bytes())
222 }
223
224 #[cfg(not(unix))]
225 #[inline]
226 fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
227 match os_str.to_string_lossy() {
228 Cow::Borrowed(x) => Cow::Borrowed(x.as_bytes()),
229 Cow::Owned(x) => Cow::Owned(Vec::from(x)),
230 }
231 }
232
233 imp(os_str)
234 }
235
236 /// Create a new byte string from an owned file path.
237 ///
238 /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
239 /// this returns the original path if it is not valid UTF-8.
240 ///
241 /// # Examples
242 ///
243 /// Basic usage:
244 ///
245 /// ```
246 /// use std::path::PathBuf;
247 ///
248 /// use bstr::{B, ByteVec};
249 ///
250 /// let path = PathBuf::from("foo");
251 /// let bs = Vec::from_path_buf(path).expect("must be valid UTF-8");
252 /// assert_eq!(bs, B("foo"));
253 /// ```
254 #[inline]
from_path_buf(path: PathBuf) -> Result<Vec<u8>, PathBuf>255 fn from_path_buf(path: PathBuf) -> Result<Vec<u8>, PathBuf> {
256 Vec::from_os_string(path.into_os_string()).map_err(PathBuf::from)
257 }
258
259 /// Lossily create a new byte string from a file path.
260 ///
261 /// On Unix, this always succeeds, is zero cost and always returns a slice.
262 /// On non-Unix systems, this does a UTF-8 check. If the given path is not
263 /// valid UTF-8, then it is lossily decoded into valid UTF-8 (with invalid
264 /// bytes replaced by the Unicode replacement codepoint).
265 ///
266 /// # Examples
267 ///
268 /// Basic usage:
269 ///
270 /// ```
271 /// use std::path::Path;
272 ///
273 /// use bstr::{B, ByteVec};
274 ///
275 /// let path = Path::new("foo");
276 /// let bs = Vec::from_path_lossy(path);
277 /// assert_eq!(bs, B("foo"));
278 /// ```
279 #[inline]
from_path_lossy<'a>(path: &'a Path) -> Cow<'a, [u8]>280 fn from_path_lossy<'a>(path: &'a Path) -> Cow<'a, [u8]> {
281 Vec::from_os_str_lossy(path.as_os_str())
282 }
283
284 /// Appends the given byte to the end of this byte string.
285 ///
286 /// Note that this is equivalent to the generic `Vec::push` method. This
287 /// method is provided to permit callers to explicitly differentiate
288 /// between pushing bytes, codepoints and strings.
289 ///
290 /// # Examples
291 ///
292 /// Basic usage:
293 ///
294 /// ```
295 /// use bstr::ByteVec;
296 ///
297 /// let mut s = <Vec<u8>>::from("abc");
298 /// s.push_byte(b'\xE2');
299 /// s.push_byte(b'\x98');
300 /// s.push_byte(b'\x83');
301 /// assert_eq!(s, "abc☃".as_bytes());
302 /// ```
303 #[inline]
push_byte(&mut self, byte: u8)304 fn push_byte(&mut self, byte: u8) {
305 self.as_vec_mut().push(byte);
306 }
307
308 /// Appends the given `char` to the end of this byte string.
309 ///
310 /// # Examples
311 ///
312 /// Basic usage:
313 ///
314 /// ```
315 /// use bstr::ByteVec;
316 ///
317 /// let mut s = <Vec<u8>>::from("abc");
318 /// s.push_char('1');
319 /// s.push_char('2');
320 /// s.push_char('3');
321 /// assert_eq!(s, "abc123".as_bytes());
322 /// ```
323 #[inline]
push_char(&mut self, ch: char)324 fn push_char(&mut self, ch: char) {
325 if ch.len_utf8() == 1 {
326 self.push_byte(ch as u8);
327 return;
328 }
329 self.as_vec_mut()
330 .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes());
331 }
332
333 /// Appends the given slice to the end of this byte string. This accepts
334 /// any type that be converted to a `&[u8]`. This includes, but is not
335 /// limited to, `&str`, `&BStr`, and of course, `&[u8]` itself.
336 ///
337 /// # Examples
338 ///
339 /// Basic usage:
340 ///
341 /// ```
342 /// use bstr::ByteVec;
343 ///
344 /// let mut s = <Vec<u8>>::from("abc");
345 /// s.push_str(b"123");
346 /// assert_eq!(s, "abc123".as_bytes());
347 /// ```
348 #[inline]
push_str<B: AsRef<[u8]>>(&mut self, bytes: B)349 fn push_str<B: AsRef<[u8]>>(&mut self, bytes: B) {
350 self.as_vec_mut().extend_from_slice(bytes.as_ref());
351 }
352
353 /// Converts a `Vec<u8>` into a `String` if and only if this byte string is
354 /// valid UTF-8.
355 ///
356 /// If it is not valid UTF-8, then a
357 /// [`FromUtf8Error`](struct.FromUtf8Error.html)
358 /// is returned. (This error can be used to examine why UTF-8 validation
359 /// failed, or to regain the original byte string.)
360 ///
361 /// # Examples
362 ///
363 /// Basic usage:
364 ///
365 /// ```
366 /// use bstr::ByteVec;
367 ///
368 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
369 /// let bytes = Vec::from("hello");
370 /// let string = bytes.into_string()?;
371 ///
372 /// assert_eq!("hello", string);
373 /// # Ok(()) }; example().unwrap()
374 /// ```
375 ///
376 /// If this byte string is not valid UTF-8, then an error will be returned.
377 /// That error can then be used to inspect the location at which invalid
378 /// UTF-8 was found, or to regain the original byte string:
379 ///
380 /// ```
381 /// use bstr::{B, ByteVec};
382 ///
383 /// let bytes = Vec::from_slice(b"foo\xFFbar");
384 /// let err = bytes.into_string().unwrap_err();
385 ///
386 /// assert_eq!(err.utf8_error().valid_up_to(), 3);
387 /// assert_eq!(err.utf8_error().error_len(), Some(1));
388 ///
389 /// // At no point in this example is an allocation performed.
390 /// let bytes = Vec::from(err.into_vec());
391 /// assert_eq!(bytes, B(b"foo\xFFbar"));
392 /// ```
393 #[inline]
into_string(self) -> Result<String, FromUtf8Error> where Self: Sized,394 fn into_string(self) -> Result<String, FromUtf8Error>
395 where
396 Self: Sized,
397 {
398 match utf8::validate(self.as_vec()) {
399 Err(err) => Err(FromUtf8Error { original: self.into_vec(), err }),
400 Ok(()) => {
401 // SAFETY: This is safe because of the guarantees provided by
402 // utf8::validate.
403 unsafe { Ok(self.into_string_unchecked()) }
404 }
405 }
406 }
407
408 /// Lossily converts a `Vec<u8>` into a `String`. If this byte string
409 /// contains invalid UTF-8, then the invalid bytes are replaced with the
410 /// Unicode replacement codepoint.
411 ///
412 /// # Examples
413 ///
414 /// Basic usage:
415 ///
416 /// ```
417 /// use bstr::ByteVec;
418 ///
419 /// let bytes = Vec::from_slice(b"foo\xFFbar");
420 /// let string = bytes.into_string_lossy();
421 /// assert_eq!(string, "foo\u{FFFD}bar");
422 /// ```
423 #[inline]
into_string_lossy(self) -> String where Self: Sized,424 fn into_string_lossy(self) -> String
425 where
426 Self: Sized,
427 {
428 match self.as_vec().to_str_lossy() {
429 Cow::Borrowed(_) => {
430 // SAFETY: to_str_lossy() returning a Cow::Borrowed guarantees
431 // the entire string is valid utf8.
432 unsafe { self.into_string_unchecked() }
433 }
434 Cow::Owned(s) => s,
435 }
436 }
437
438 /// Unsafely convert this byte string into a `String`, without checking for
439 /// valid UTF-8.
440 ///
441 /// # Safety
442 ///
443 /// Callers *must* ensure that this byte string is valid UTF-8 before
444 /// calling this method. Converting a byte string into a `String` that is
445 /// not valid UTF-8 is considered undefined behavior.
446 ///
447 /// This routine is useful in performance sensitive contexts where the
448 /// UTF-8 validity of the byte string is already known and it is
449 /// undesirable to pay the cost of an additional UTF-8 validation check
450 /// that [`into_string`](#method.into_string) performs.
451 ///
452 /// # Examples
453 ///
454 /// Basic usage:
455 ///
456 /// ```
457 /// use bstr::ByteVec;
458 ///
459 /// // SAFETY: This is safe because string literals are guaranteed to be
460 /// // valid UTF-8 by the Rust compiler.
461 /// let s = unsafe { Vec::from("☃βツ").into_string_unchecked() };
462 /// assert_eq!("☃βツ", s);
463 /// ```
464 #[inline]
into_string_unchecked(self) -> String where Self: Sized,465 unsafe fn into_string_unchecked(self) -> String
466 where
467 Self: Sized,
468 {
469 String::from_utf8_unchecked(self.into_vec())
470 }
471
472 /// Converts this byte string into an OS string, in place.
473 ///
474 /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
475 /// this returns the original byte string if it is not valid UTF-8.
476 ///
477 /// # Examples
478 ///
479 /// Basic usage:
480 ///
481 /// ```
482 /// use std::ffi::OsStr;
483 ///
484 /// use bstr::ByteVec;
485 ///
486 /// let bs = Vec::from("foo");
487 /// let os_str = bs.into_os_string().expect("should be valid UTF-8");
488 /// assert_eq!(os_str, OsStr::new("foo"));
489 /// ```
490 #[inline]
into_os_string(self) -> Result<OsString, Vec<u8>> where Self: Sized,491 fn into_os_string(self) -> Result<OsString, Vec<u8>>
492 where
493 Self: Sized,
494 {
495 #[cfg(unix)]
496 #[inline]
497 fn imp(v: Vec<u8>) -> Result<OsString, Vec<u8>> {
498 use std::os::unix::ffi::OsStringExt;
499
500 Ok(OsString::from_vec(v))
501 }
502
503 #[cfg(not(unix))]
504 #[inline]
505 fn imp(v: Vec<u8>) -> Result<OsString, Vec<u8>> {
506 match v.into_string() {
507 Ok(s) => Ok(OsString::from(s)),
508 Err(err) => Err(err.into_vec()),
509 }
510 }
511
512 imp(self.into_vec())
513 }
514
515 /// Lossily converts this byte string into an OS string, in place.
516 ///
517 /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
518 /// this will perform a UTF-8 check and lossily convert this byte string
519 /// into valid UTF-8 using the Unicode replacement codepoint.
520 ///
521 /// Note that this can prevent the correct roundtripping of file paths on
522 /// non-Unix systems such as Windows, where file paths are an arbitrary
523 /// sequence of 16-bit integers.
524 ///
525 /// # Examples
526 ///
527 /// Basic usage:
528 ///
529 /// ```
530 /// use bstr::ByteVec;
531 ///
532 /// let bs = Vec::from_slice(b"foo\xFFbar");
533 /// let os_str = bs.into_os_string_lossy();
534 /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
535 /// ```
536 #[inline]
into_os_string_lossy(self) -> OsString where Self: Sized,537 fn into_os_string_lossy(self) -> OsString
538 where
539 Self: Sized,
540 {
541 #[cfg(unix)]
542 #[inline]
543 fn imp(v: Vec<u8>) -> OsString {
544 use std::os::unix::ffi::OsStringExt;
545
546 OsString::from_vec(v)
547 }
548
549 #[cfg(not(unix))]
550 #[inline]
551 fn imp(v: Vec<u8>) -> OsString {
552 OsString::from(v.into_string_lossy())
553 }
554
555 imp(self.into_vec())
556 }
557
558 /// Converts this byte string into an owned file path, in place.
559 ///
560 /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
561 /// this returns the original byte string if it is not valid UTF-8.
562 ///
563 /// # Examples
564 ///
565 /// Basic usage:
566 ///
567 /// ```
568 /// use bstr::ByteVec;
569 ///
570 /// let bs = Vec::from("foo");
571 /// let path = bs.into_path_buf().expect("should be valid UTF-8");
572 /// assert_eq!(path.as_os_str(), "foo");
573 /// ```
574 #[inline]
into_path_buf(self) -> Result<PathBuf, Vec<u8>> where Self: Sized,575 fn into_path_buf(self) -> Result<PathBuf, Vec<u8>>
576 where
577 Self: Sized,
578 {
579 self.into_os_string().map(PathBuf::from)
580 }
581
582 /// Lossily converts this byte string into an owned file path, in place.
583 ///
584 /// On Unix, this always succeeds and is zero cost. On non-Unix systems,
585 /// this will perform a UTF-8 check and lossily convert this byte string
586 /// into valid UTF-8 using the Unicode replacement codepoint.
587 ///
588 /// Note that this can prevent the correct roundtripping of file paths on
589 /// non-Unix systems such as Windows, where file paths are an arbitrary
590 /// sequence of 16-bit integers.
591 ///
592 /// # Examples
593 ///
594 /// Basic usage:
595 ///
596 /// ```
597 /// use bstr::ByteVec;
598 ///
599 /// let bs = Vec::from_slice(b"foo\xFFbar");
600 /// let path = bs.into_path_buf_lossy();
601 /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
602 /// ```
603 #[inline]
into_path_buf_lossy(self) -> PathBuf where Self: Sized,604 fn into_path_buf_lossy(self) -> PathBuf
605 where
606 Self: Sized,
607 {
608 PathBuf::from(self.into_os_string_lossy())
609 }
610
611 /// Removes the last byte from this `Vec<u8>` and returns it.
612 ///
613 /// If this byte string is empty, then `None` is returned.
614 ///
615 /// If the last codepoint in this byte string is not ASCII, then removing
616 /// the last byte could make this byte string contain invalid UTF-8.
617 ///
618 /// Note that this is equivalent to the generic `Vec::pop` method. This
619 /// method is provided to permit callers to explicitly differentiate
620 /// between popping bytes and codepoints.
621 ///
622 /// # Examples
623 ///
624 /// Basic usage:
625 ///
626 /// ```
627 /// use bstr::ByteVec;
628 ///
629 /// let mut s = Vec::from("foo");
630 /// assert_eq!(s.pop_byte(), Some(b'o'));
631 /// assert_eq!(s.pop_byte(), Some(b'o'));
632 /// assert_eq!(s.pop_byte(), Some(b'f'));
633 /// assert_eq!(s.pop_byte(), None);
634 /// ```
635 #[inline]
pop_byte(&mut self) -> Option<u8>636 fn pop_byte(&mut self) -> Option<u8> {
637 self.as_vec_mut().pop()
638 }
639
640 /// Removes the last codepoint from this `Vec<u8>` and returns it.
641 ///
642 /// If this byte string is empty, then `None` is returned. If the last
643 /// bytes of this byte string do not correspond to a valid UTF-8 code unit
644 /// sequence, then the Unicode replacement codepoint is yielded instead in
645 /// accordance with the
646 /// [replacement codepoint substitution policy](index.html#handling-of-invalid-utf8-8).
647 ///
648 /// # Examples
649 ///
650 /// Basic usage:
651 ///
652 /// ```
653 /// use bstr::ByteVec;
654 ///
655 /// let mut s = Vec::from("foo");
656 /// assert_eq!(s.pop_char(), Some('o'));
657 /// assert_eq!(s.pop_char(), Some('o'));
658 /// assert_eq!(s.pop_char(), Some('f'));
659 /// assert_eq!(s.pop_char(), None);
660 /// ```
661 ///
662 /// This shows the replacement codepoint substitution policy. Note that
663 /// the first pop yields a replacement codepoint but actually removes two
664 /// bytes. This is in contrast with subsequent pops when encountering
665 /// `\xFF` since `\xFF` is never a valid prefix for any valid UTF-8
666 /// code unit sequence.
667 ///
668 /// ```
669 /// use bstr::ByteVec;
670 ///
671 /// let mut s = Vec::from_slice(b"f\xFF\xFF\xFFoo\xE2\x98");
672 /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
673 /// assert_eq!(s.pop_char(), Some('o'));
674 /// assert_eq!(s.pop_char(), Some('o'));
675 /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
676 /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
677 /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
678 /// assert_eq!(s.pop_char(), Some('f'));
679 /// assert_eq!(s.pop_char(), None);
680 /// ```
681 #[inline]
pop_char(&mut self) -> Option<char>682 fn pop_char(&mut self) -> Option<char> {
683 let (ch, size) = utf8::decode_last_lossy(self.as_vec());
684 if size == 0 {
685 return None;
686 }
687 let new_len = self.as_vec().len() - size;
688 self.as_vec_mut().truncate(new_len);
689 Some(ch)
690 }
691
692 /// Removes a `char` from this `Vec<u8>` at the given byte position and
693 /// returns it.
694 ///
695 /// If the bytes at the given position do not lead to a valid UTF-8 code
696 /// unit sequence, then a
697 /// [replacement codepoint is returned instead](index.html#handling-of-invalid-utf8-8).
698 ///
699 /// # Panics
700 ///
701 /// Panics if `at` is larger than or equal to this byte string's length.
702 ///
703 /// # Examples
704 ///
705 /// Basic usage:
706 ///
707 /// ```
708 /// use bstr::ByteVec;
709 ///
710 /// let mut s = Vec::from("foo☃bar");
711 /// assert_eq!(s.remove_char(3), '☃');
712 /// assert_eq!(s, b"foobar");
713 /// ```
714 ///
715 /// This example shows how the Unicode replacement codepoint policy is
716 /// used:
717 ///
718 /// ```
719 /// use bstr::ByteVec;
720 ///
721 /// let mut s = Vec::from_slice(b"foo\xFFbar");
722 /// assert_eq!(s.remove_char(3), '\u{FFFD}');
723 /// assert_eq!(s, b"foobar");
724 /// ```
725 #[inline]
remove_char(&mut self, at: usize) -> char726 fn remove_char(&mut self, at: usize) -> char {
727 let (ch, size) = utf8::decode_lossy(&self.as_vec()[at..]);
728 assert!(
729 size > 0,
730 "expected {} to be less than {}",
731 at,
732 self.as_vec().len(),
733 );
734 self.as_vec_mut().drain(at..at + size);
735 ch
736 }
737
738 /// Inserts the given codepoint into this `Vec<u8>` at a particular byte
739 /// position.
740 ///
741 /// This is an `O(n)` operation as it may copy a number of elements in this
742 /// byte string proportional to its length.
743 ///
744 /// # Panics
745 ///
746 /// Panics if `at` is larger than the byte string's length.
747 ///
748 /// # Examples
749 ///
750 /// Basic usage:
751 ///
752 /// ```
753 /// use bstr::ByteVec;
754 ///
755 /// let mut s = Vec::from("foobar");
756 /// s.insert_char(3, '☃');
757 /// assert_eq!(s, "foo☃bar".as_bytes());
758 /// ```
759 #[inline]
insert_char(&mut self, at: usize, ch: char)760 fn insert_char(&mut self, at: usize, ch: char) {
761 self.insert_str(at, ch.encode_utf8(&mut [0; 4]).as_bytes());
762 }
763
764 /// Inserts the given byte string into this byte string at a particular
765 /// byte position.
766 ///
767 /// This is an `O(n)` operation as it may copy a number of elements in this
768 /// byte string proportional to its length.
769 ///
770 /// The given byte string may be any type that can be cheaply converted
771 /// into a `&[u8]`. This includes, but is not limited to, `&str` and
772 /// `&[u8]`.
773 ///
774 /// # Panics
775 ///
776 /// Panics if `at` is larger than the byte string's length.
777 ///
778 /// # Examples
779 ///
780 /// Basic usage:
781 ///
782 /// ```
783 /// use bstr::ByteVec;
784 ///
785 /// let mut s = Vec::from("foobar");
786 /// s.insert_str(3, "☃☃☃");
787 /// assert_eq!(s, "foo☃☃☃bar".as_bytes());
788 /// ```
789 #[inline]
insert_str<B: AsRef<[u8]>>(&mut self, at: usize, bytes: B)790 fn insert_str<B: AsRef<[u8]>>(&mut self, at: usize, bytes: B) {
791 let bytes = bytes.as_ref();
792 let len = self.as_vec().len();
793 assert!(at <= len, "expected {} to be <= {}", at, len);
794
795 // SAFETY: We'd like to efficiently splice in the given bytes into
796 // this byte string. Since we are only working with `u8` elements here,
797 // we only need to consider whether our bounds are correct and whether
798 // our byte string has enough space.
799 self.as_vec_mut().reserve(bytes.len());
800 unsafe {
801 // Shift bytes after `at` over by the length of `bytes` to make
802 // room for it. This requires referencing two regions of memory
803 // that may overlap, so we use ptr::copy.
804 ptr::copy(
805 self.as_vec().as_ptr().add(at),
806 self.as_vec_mut().as_mut_ptr().add(at + bytes.len()),
807 len - at,
808 );
809 // Now copy the bytes given into the room we made above. In this
810 // case, we know that the given bytes cannot possibly overlap
811 // with this byte string since we have a mutable borrow of the
812 // latter. Thus, we can use a nonoverlapping copy.
813 ptr::copy_nonoverlapping(
814 bytes.as_ptr(),
815 self.as_vec_mut().as_mut_ptr().add(at),
816 bytes.len(),
817 );
818 self.as_vec_mut().set_len(len + bytes.len());
819 }
820 }
821
822 /// Removes the specified range in this byte string and replaces it with
823 /// the given bytes. The given bytes do not need to have the same length
824 /// as the range provided.
825 ///
826 /// # Panics
827 ///
828 /// Panics if the given range is invalid.
829 ///
830 /// # Examples
831 ///
832 /// Basic usage:
833 ///
834 /// ```
835 /// use bstr::ByteVec;
836 ///
837 /// let mut s = Vec::from("foobar");
838 /// s.replace_range(2..4, "xxxxx");
839 /// assert_eq!(s, "foxxxxxar".as_bytes());
840 /// ```
841 #[inline]
replace_range<R, B>(&mut self, range: R, replace_with: B) where R: ops::RangeBounds<usize>, B: AsRef<[u8]>,842 fn replace_range<R, B>(&mut self, range: R, replace_with: B)
843 where
844 R: ops::RangeBounds<usize>,
845 B: AsRef<[u8]>,
846 {
847 self.as_vec_mut().splice(range, replace_with.as_ref().iter().cloned());
848 }
849
850 /// Creates a draining iterator that removes the specified range in this
851 /// `Vec<u8>` and yields each of the removed bytes.
852 ///
853 /// Note that the elements specified by the given range are removed
854 /// regardless of whether the returned iterator is fully exhausted.
855 ///
856 /// Also note that is is unspecified how many bytes are removed from the
857 /// `Vec<u8>` if the `DrainBytes` iterator is leaked.
858 ///
859 /// # Panics
860 ///
861 /// Panics if the given range is not valid.
862 ///
863 /// # Examples
864 ///
865 /// Basic usage:
866 ///
867 /// ```
868 /// use bstr::ByteVec;
869 ///
870 /// let mut s = Vec::from("foobar");
871 /// {
872 /// let mut drainer = s.drain_bytes(2..4);
873 /// assert_eq!(drainer.next(), Some(b'o'));
874 /// assert_eq!(drainer.next(), Some(b'b'));
875 /// assert_eq!(drainer.next(), None);
876 /// }
877 /// assert_eq!(s, "foar".as_bytes());
878 /// ```
879 #[inline]
drain_bytes<R>(&mut self, range: R) -> DrainBytes where R: ops::RangeBounds<usize>,880 fn drain_bytes<R>(&mut self, range: R) -> DrainBytes
881 where
882 R: ops::RangeBounds<usize>,
883 {
884 DrainBytes { it: self.as_vec_mut().drain(range) }
885 }
886 }
887
888 /// A draining byte oriented iterator for `Vec<u8>`.
889 ///
890 /// This iterator is created by
891 /// [`ByteVec::drain_bytes`](trait.ByteVec.html#method.drain_bytes).
892 ///
893 /// # Examples
894 ///
895 /// Basic usage:
896 ///
897 /// ```
898 /// use bstr::ByteVec;
899 ///
900 /// let mut s = Vec::from("foobar");
901 /// {
902 /// let mut drainer = s.drain_bytes(2..4);
903 /// assert_eq!(drainer.next(), Some(b'o'));
904 /// assert_eq!(drainer.next(), Some(b'b'));
905 /// assert_eq!(drainer.next(), None);
906 /// }
907 /// assert_eq!(s, "foar".as_bytes());
908 /// ```
909 #[derive(Debug)]
910 pub struct DrainBytes<'a> {
911 it: vec::Drain<'a, u8>,
912 }
913
914 impl<'a> iter::FusedIterator for DrainBytes<'a> {}
915
916 impl<'a> Iterator for DrainBytes<'a> {
917 type Item = u8;
918
919 #[inline]
next(&mut self) -> Option<u8>920 fn next(&mut self) -> Option<u8> {
921 self.it.next()
922 }
923 }
924
925 impl<'a> DoubleEndedIterator for DrainBytes<'a> {
926 #[inline]
next_back(&mut self) -> Option<u8>927 fn next_back(&mut self) -> Option<u8> {
928 self.it.next_back()
929 }
930 }
931
932 impl<'a> ExactSizeIterator for DrainBytes<'a> {
933 #[inline]
len(&self) -> usize934 fn len(&self) -> usize {
935 self.it.len()
936 }
937 }
938
939 /// An error that may occur when converting a `Vec<u8>` to a `String`.
940 ///
941 /// This error includes the original `Vec<u8>` that failed to convert to a
942 /// `String`. This permits callers to recover the allocation used even if it
943 /// it not valid UTF-8.
944 ///
945 /// # Examples
946 ///
947 /// Basic usage:
948 ///
949 /// ```
950 /// use bstr::{B, ByteVec};
951 ///
952 /// let bytes = Vec::from_slice(b"foo\xFFbar");
953 /// let err = bytes.into_string().unwrap_err();
954 ///
955 /// assert_eq!(err.utf8_error().valid_up_to(), 3);
956 /// assert_eq!(err.utf8_error().error_len(), Some(1));
957 ///
958 /// // At no point in this example is an allocation performed.
959 /// let bytes = Vec::from(err.into_vec());
960 /// assert_eq!(bytes, B(b"foo\xFFbar"));
961 /// ```
962 #[derive(Debug, Eq, PartialEq)]
963 pub struct FromUtf8Error {
964 original: Vec<u8>,
965 err: Utf8Error,
966 }
967
968 impl FromUtf8Error {
969 /// Return the original bytes as a slice that failed to convert to a
970 /// `String`.
971 ///
972 /// # Examples
973 ///
974 /// Basic usage:
975 ///
976 /// ```
977 /// use bstr::{B, ByteVec};
978 ///
979 /// let bytes = Vec::from_slice(b"foo\xFFbar");
980 /// let err = bytes.into_string().unwrap_err();
981 ///
982 /// // At no point in this example is an allocation performed.
983 /// assert_eq!(err.as_bytes(), B(b"foo\xFFbar"));
984 /// ```
985 #[inline]
as_bytes(&self) -> &[u8]986 pub fn as_bytes(&self) -> &[u8] {
987 &self.original
988 }
989
990 /// Consume this error and return the original byte string that failed to
991 /// convert to a `String`.
992 ///
993 /// # Examples
994 ///
995 /// Basic usage:
996 ///
997 /// ```
998 /// use bstr::{B, ByteVec};
999 ///
1000 /// let bytes = Vec::from_slice(b"foo\xFFbar");
1001 /// let err = bytes.into_string().unwrap_err();
1002 /// let original = err.into_vec();
1003 ///
1004 /// // At no point in this example is an allocation performed.
1005 /// assert_eq!(original, B(b"foo\xFFbar"));
1006 /// ```
1007 #[inline]
into_vec(self) -> Vec<u8>1008 pub fn into_vec(self) -> Vec<u8> {
1009 self.original
1010 }
1011
1012 /// Return the underlying UTF-8 error that occurred. This error provides
1013 /// information on the nature and location of the invalid UTF-8 detected.
1014 ///
1015 /// # Examples
1016 ///
1017 /// Basic usage:
1018 ///
1019 /// ```
1020 /// use bstr::{B, ByteVec};
1021 ///
1022 /// let bytes = Vec::from_slice(b"foo\xFFbar");
1023 /// let err = bytes.into_string().unwrap_err();
1024 ///
1025 /// assert_eq!(err.utf8_error().valid_up_to(), 3);
1026 /// assert_eq!(err.utf8_error().error_len(), Some(1));
1027 /// ```
1028 #[inline]
utf8_error(&self) -> &Utf8Error1029 pub fn utf8_error(&self) -> &Utf8Error {
1030 &self.err
1031 }
1032 }
1033
1034 impl error::Error for FromUtf8Error {
1035 #[inline]
description(&self) -> &str1036 fn description(&self) -> &str {
1037 "invalid UTF-8 vector"
1038 }
1039 }
1040
1041 impl fmt::Display for FromUtf8Error {
1042 #[inline]
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result1043 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1044 write!(f, "{}", self.err)
1045 }
1046 }
1047
1048 #[cfg(test)]
1049 mod tests {
1050 use ext_slice::B;
1051 use ext_vec::ByteVec;
1052
1053 #[test]
insert()1054 fn insert() {
1055 let mut s = vec![];
1056 s.insert_str(0, "foo");
1057 assert_eq!(s, "foo".as_bytes());
1058
1059 let mut s = Vec::from("a");
1060 s.insert_str(0, "foo");
1061 assert_eq!(s, "fooa".as_bytes());
1062
1063 let mut s = Vec::from("a");
1064 s.insert_str(1, "foo");
1065 assert_eq!(s, "afoo".as_bytes());
1066
1067 let mut s = Vec::from("foobar");
1068 s.insert_str(3, "quux");
1069 assert_eq!(s, "fooquuxbar".as_bytes());
1070
1071 let mut s = Vec::from("foobar");
1072 s.insert_str(3, "x");
1073 assert_eq!(s, "fooxbar".as_bytes());
1074
1075 let mut s = Vec::from("foobar");
1076 s.insert_str(0, "x");
1077 assert_eq!(s, "xfoobar".as_bytes());
1078
1079 let mut s = Vec::from("foobar");
1080 s.insert_str(6, "x");
1081 assert_eq!(s, "foobarx".as_bytes());
1082
1083 let mut s = Vec::from("foobar");
1084 s.insert_str(3, "quuxbazquux");
1085 assert_eq!(s, "fooquuxbazquuxbar".as_bytes());
1086 }
1087
1088 #[test]
1089 #[should_panic]
insert_fail1()1090 fn insert_fail1() {
1091 let mut s = vec![];
1092 s.insert_str(1, "foo");
1093 }
1094
1095 #[test]
1096 #[should_panic]
insert_fail2()1097 fn insert_fail2() {
1098 let mut s = Vec::from("a");
1099 s.insert_str(2, "foo");
1100 }
1101
1102 #[test]
1103 #[should_panic]
insert_fail3()1104 fn insert_fail3() {
1105 let mut s = Vec::from("foobar");
1106 s.insert_str(7, "foo");
1107 }
1108 }
1109