1 //! String manipulation.
2 //!
3 //! For more details, see the [`std::str`] module.
4 //!
5 //! [`std::str`]: ../../std/str/index.html
6
7 #![stable(feature = "rust1", since = "1.0.0")]
8
9 mod converts;
10 mod count;
11 mod error;
12 mod iter;
13 mod traits;
14 mod validations;
15
16 use self::pattern::Pattern;
17 use self::pattern::{DoubleEndedSearcher, ReverseSearcher, Searcher};
18
19 use crate::ascii;
20 use crate::char::{self, EscapeDebugExtArgs};
21 use crate::mem;
22 use crate::slice::{self, SliceIndex};
23
24 pub mod pattern;
25
26 mod lossy;
27 #[unstable(feature = "utf8_chunks", issue = "99543")]
28 pub use lossy::{Utf8Chunk, Utf8Chunks};
29
30 #[stable(feature = "rust1", since = "1.0.0")]
31 pub use converts::{from_utf8, from_utf8_unchecked};
32
33 #[stable(feature = "str_mut_extras", since = "1.20.0")]
34 pub use converts::{from_utf8_mut, from_utf8_unchecked_mut};
35
36 #[stable(feature = "rust1", since = "1.0.0")]
37 pub use error::{ParseBoolError, Utf8Error};
38
39 #[stable(feature = "rust1", since = "1.0.0")]
40 pub use traits::FromStr;
41
42 #[stable(feature = "rust1", since = "1.0.0")]
43 pub use iter::{Bytes, CharIndices, Chars, Lines, SplitWhitespace};
44
45 #[stable(feature = "rust1", since = "1.0.0")]
46 #[allow(deprecated)]
47 pub use iter::LinesAny;
48
49 #[stable(feature = "rust1", since = "1.0.0")]
50 pub use iter::{RSplit, RSplitTerminator, Split, SplitTerminator};
51
52 #[stable(feature = "rust1", since = "1.0.0")]
53 pub use iter::{RSplitN, SplitN};
54
55 #[stable(feature = "str_matches", since = "1.2.0")]
56 pub use iter::{Matches, RMatches};
57
58 #[stable(feature = "str_match_indices", since = "1.5.0")]
59 pub use iter::{MatchIndices, RMatchIndices};
60
61 #[stable(feature = "encode_utf16", since = "1.8.0")]
62 pub use iter::EncodeUtf16;
63
64 #[stable(feature = "str_escape", since = "1.34.0")]
65 pub use iter::{EscapeDebug, EscapeDefault, EscapeUnicode};
66
67 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
68 pub use iter::SplitAsciiWhitespace;
69
70 #[stable(feature = "split_inclusive", since = "1.51.0")]
71 pub use iter::SplitInclusive;
72
73 #[unstable(feature = "str_internals", issue = "none")]
74 pub use validations::{next_code_point, utf8_char_width};
75
76 use iter::MatchIndicesInternal;
77 use iter::SplitInternal;
78 use iter::{MatchesInternal, SplitNInternal};
79
80 #[inline(never)]
81 #[cold]
82 #[track_caller]
83 #[rustc_allow_const_fn_unstable(const_eval_select)]
slice_error_fail(s: &str, begin: usize, end: usize) -> !84 const fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
85 // SAFETY: panics for both branches
86 unsafe {
87 crate::intrinsics::const_eval_select(
88 (s, begin, end),
89 slice_error_fail_ct,
90 slice_error_fail_rt,
91 )
92 }
93 }
94
95 #[track_caller]
slice_error_fail_ct(_: &str, _: usize, _: usize) -> !96 const fn slice_error_fail_ct(_: &str, _: usize, _: usize) -> ! {
97 panic!("failed to slice string");
98 }
99
100 #[track_caller]
slice_error_fail_rt(s: &str, begin: usize, end: usize) -> !101 fn slice_error_fail_rt(s: &str, begin: usize, end: usize) -> ! {
102 const MAX_DISPLAY_LENGTH: usize = 256;
103 let trunc_len = s.floor_char_boundary(MAX_DISPLAY_LENGTH);
104 let s_trunc = &s[..trunc_len];
105 let ellipsis = if trunc_len < s.len() { "[...]" } else { "" };
106
107 // 1. out of bounds
108 if begin > s.len() || end > s.len() {
109 let oob_index = if begin > s.len() { begin } else { end };
110 panic!("byte index {oob_index} is out of bounds of `{s_trunc}`{ellipsis}");
111 }
112
113 // 2. begin <= end
114 assert!(
115 begin <= end,
116 "begin <= end ({} <= {}) when slicing `{}`{}",
117 begin,
118 end,
119 s_trunc,
120 ellipsis
121 );
122
123 // 3. character boundary
124 let index = if !s.is_char_boundary(begin) { begin } else { end };
125 // find the character
126 let char_start = s.floor_char_boundary(index);
127 // `char_start` must be less than len and a char boundary
128 let ch = s[char_start..].chars().next().unwrap();
129 let char_range = char_start..char_start + ch.len_utf8();
130 panic!(
131 "byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}",
132 index, ch, char_range, s_trunc, ellipsis
133 );
134 }
135
136 #[cfg(not(test))]
137 impl str {
138 /// Returns the length of `self`.
139 ///
140 /// This length is in bytes, not [`char`]s or graphemes. In other words,
141 /// it might not be what a human considers the length of the string.
142 ///
143 /// [`char`]: prim@char
144 ///
145 /// # Examples
146 ///
147 /// ```
148 /// let len = "foo".len();
149 /// assert_eq!(3, len);
150 ///
151 /// assert_eq!("ƒoo".len(), 4); // fancy f!
152 /// assert_eq!("ƒoo".chars().count(), 3);
153 /// ```
154 #[stable(feature = "rust1", since = "1.0.0")]
155 #[rustc_const_stable(feature = "const_str_len", since = "1.39.0")]
156 #[must_use]
157 #[inline]
len(&self) -> usize158 pub const fn len(&self) -> usize {
159 self.as_bytes().len()
160 }
161
162 /// Returns `true` if `self` has a length of zero bytes.
163 ///
164 /// # Examples
165 ///
166 /// ```
167 /// let s = "";
168 /// assert!(s.is_empty());
169 ///
170 /// let s = "not empty";
171 /// assert!(!s.is_empty());
172 /// ```
173 #[stable(feature = "rust1", since = "1.0.0")]
174 #[rustc_const_stable(feature = "const_str_is_empty", since = "1.39.0")]
175 #[must_use]
176 #[inline]
is_empty(&self) -> bool177 pub const fn is_empty(&self) -> bool {
178 self.len() == 0
179 }
180
181 /// Checks that `index`-th byte is the first byte in a UTF-8 code point
182 /// sequence or the end of the string.
183 ///
184 /// The start and end of the string (when `index == self.len()`) are
185 /// considered to be boundaries.
186 ///
187 /// Returns `false` if `index` is greater than `self.len()`.
188 ///
189 /// # Examples
190 ///
191 /// ```
192 /// let s = "Löwe 老虎 Léopard";
193 /// assert!(s.is_char_boundary(0));
194 /// // start of `老`
195 /// assert!(s.is_char_boundary(6));
196 /// assert!(s.is_char_boundary(s.len()));
197 ///
198 /// // second byte of `ö`
199 /// assert!(!s.is_char_boundary(2));
200 ///
201 /// // third byte of `老`
202 /// assert!(!s.is_char_boundary(8));
203 /// ```
204 #[must_use]
205 #[stable(feature = "is_char_boundary", since = "1.9.0")]
206 #[inline]
is_char_boundary(&self, index: usize) -> bool207 pub fn is_char_boundary(&self, index: usize) -> bool {
208 // 0 is always ok.
209 // Test for 0 explicitly so that it can optimize out the check
210 // easily and skip reading string data for that case.
211 // Note that optimizing `self.get(..index)` relies on this.
212 if index == 0 {
213 return true;
214 }
215
216 match self.as_bytes().get(index) {
217 // For `None` we have two options:
218 //
219 // - index == self.len()
220 // Empty strings are valid, so return true
221 // - index > self.len()
222 // In this case return false
223 //
224 // The check is placed exactly here, because it improves generated
225 // code on higher opt-levels. See PR #84751 for more details.
226 None => index == self.len(),
227
228 Some(&b) => b.is_utf8_char_boundary(),
229 }
230 }
231
232 /// Finds the closest `x` not exceeding `index` where `is_char_boundary(x)` is `true`.
233 ///
234 /// This method can help you truncate a string so that it's still valid UTF-8, but doesn't
235 /// exceed a given number of bytes. Note that this is done purely at the character level
236 /// and can still visually split graphemes, even though the underlying characters aren't
237 /// split. For example, the emoji (scientist) could be split so that the string only
238 /// includes (person) instead.
239 ///
240 /// # Examples
241 ///
242 /// ```
243 /// #![feature(round_char_boundary)]
244 /// let s = "❤️";
245 /// assert_eq!(s.len(), 26);
246 /// assert!(!s.is_char_boundary(13));
247 ///
248 /// let closest = s.floor_char_boundary(13);
249 /// assert_eq!(closest, 10);
250 /// assert_eq!(&s[..closest], "❤️");
251 /// ```
252 #[unstable(feature = "round_char_boundary", issue = "93743")]
253 #[inline]
floor_char_boundary(&self, index: usize) -> usize254 pub fn floor_char_boundary(&self, index: usize) -> usize {
255 if index >= self.len() {
256 self.len()
257 } else {
258 let lower_bound = index.saturating_sub(3);
259 let new_index = self.as_bytes()[lower_bound..=index]
260 .iter()
261 .rposition(|b| b.is_utf8_char_boundary());
262
263 // SAFETY: we know that the character boundary will be within four bytes
264 unsafe { lower_bound + new_index.unwrap_unchecked() }
265 }
266 }
267
268 /// Finds the closest `x` not below `index` where `is_char_boundary(x)` is `true`.
269 ///
270 /// This method is the natural complement to [`floor_char_boundary`]. See that method
271 /// for more details.
272 ///
273 /// [`floor_char_boundary`]: str::floor_char_boundary
274 ///
275 /// # Panics
276 ///
277 /// Panics if `index > self.len()`.
278 ///
279 /// # Examples
280 ///
281 /// ```
282 /// #![feature(round_char_boundary)]
283 /// let s = "❤️";
284 /// assert_eq!(s.len(), 26);
285 /// assert!(!s.is_char_boundary(13));
286 ///
287 /// let closest = s.ceil_char_boundary(13);
288 /// assert_eq!(closest, 14);
289 /// assert_eq!(&s[..closest], "❤️");
290 /// ```
291 #[unstable(feature = "round_char_boundary", issue = "93743")]
292 #[inline]
ceil_char_boundary(&self, index: usize) -> usize293 pub fn ceil_char_boundary(&self, index: usize) -> usize {
294 if index > self.len() {
295 slice_error_fail(self, index, index)
296 } else {
297 let upper_bound = Ord::min(index + 4, self.len());
298 self.as_bytes()[index..upper_bound]
299 .iter()
300 .position(|b| b.is_utf8_char_boundary())
301 .map_or(upper_bound, |pos| pos + index)
302 }
303 }
304
305 /// Converts a string slice to a byte slice. To convert the byte slice back
306 /// into a string slice, use the [`from_utf8`] function.
307 ///
308 /// # Examples
309 ///
310 /// ```
311 /// let bytes = "bors".as_bytes();
312 /// assert_eq!(b"bors", bytes);
313 /// ```
314 #[stable(feature = "rust1", since = "1.0.0")]
315 #[rustc_const_stable(feature = "str_as_bytes", since = "1.39.0")]
316 #[must_use]
317 #[inline(always)]
318 #[allow(unused_attributes)]
as_bytes(&self) -> &[u8]319 pub const fn as_bytes(&self) -> &[u8] {
320 // SAFETY: const sound because we transmute two types with the same layout
321 unsafe { mem::transmute(self) }
322 }
323
324 /// Converts a mutable string slice to a mutable byte slice.
325 ///
326 /// # Safety
327 ///
328 /// The caller must ensure that the content of the slice is valid UTF-8
329 /// before the borrow ends and the underlying `str` is used.
330 ///
331 /// Use of a `str` whose contents are not valid UTF-8 is undefined behavior.
332 ///
333 /// # Examples
334 ///
335 /// Basic usage:
336 ///
337 /// ```
338 /// let mut s = String::from("Hello");
339 /// let bytes = unsafe { s.as_bytes_mut() };
340 ///
341 /// assert_eq!(b"Hello", bytes);
342 /// ```
343 ///
344 /// Mutability:
345 ///
346 /// ```
347 /// let mut s = String::from("∈");
348 ///
349 /// unsafe {
350 /// let bytes = s.as_bytes_mut();
351 ///
352 /// bytes[0] = 0xF0;
353 /// bytes[1] = 0x9F;
354 /// bytes[2] = 0x8D;
355 /// bytes[3] = 0x94;
356 /// }
357 ///
358 /// assert_eq!("∈", s);
359 /// ```
360 #[stable(feature = "str_mut_extras", since = "1.20.0")]
361 #[must_use]
362 #[inline(always)]
as_bytes_mut(&mut self) -> &mut [u8]363 pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] {
364 // SAFETY: the cast from `&str` to `&[u8]` is safe since `str`
365 // has the same layout as `&[u8]` (only std can make this guarantee).
366 // The pointer dereference is safe since it comes from a mutable reference which
367 // is guaranteed to be valid for writes.
368 unsafe { &mut *(self as *mut str as *mut [u8]) }
369 }
370
371 /// Converts a string slice to a raw pointer.
372 ///
373 /// As string slices are a slice of bytes, the raw pointer points to a
374 /// [`u8`]. This pointer will be pointing to the first byte of the string
375 /// slice.
376 ///
377 /// The caller must ensure that the returned pointer is never written to.
378 /// If you need to mutate the contents of the string slice, use [`as_mut_ptr`].
379 ///
380 /// [`as_mut_ptr`]: str::as_mut_ptr
381 ///
382 /// # Examples
383 ///
384 /// ```
385 /// let s = "Hello";
386 /// let ptr = s.as_ptr();
387 /// ```
388 #[stable(feature = "rust1", since = "1.0.0")]
389 #[rustc_const_stable(feature = "rustc_str_as_ptr", since = "1.32.0")]
390 #[must_use]
391 #[inline(always)]
as_ptr(&self) -> *const u8392 pub const fn as_ptr(&self) -> *const u8 {
393 self as *const str as *const u8
394 }
395
396 /// Converts a mutable string slice to a raw pointer.
397 ///
398 /// As string slices are a slice of bytes, the raw pointer points to a
399 /// [`u8`]. This pointer will be pointing to the first byte of the string
400 /// slice.
401 ///
402 /// It is your responsibility to make sure that the string slice only gets
403 /// modified in a way that it remains valid UTF-8.
404 #[stable(feature = "str_as_mut_ptr", since = "1.36.0")]
405 #[must_use]
406 #[inline(always)]
as_mut_ptr(&mut self) -> *mut u8407 pub fn as_mut_ptr(&mut self) -> *mut u8 {
408 self as *mut str as *mut u8
409 }
410
411 /// Returns a subslice of `str`.
412 ///
413 /// This is the non-panicking alternative to indexing the `str`. Returns
414 /// [`None`] whenever equivalent indexing operation would panic.
415 ///
416 /// # Examples
417 ///
418 /// ```
419 /// let v = String::from("∈");
420 ///
421 /// assert_eq!(Some(""), v.get(0..4));
422 ///
423 /// // indices not on UTF-8 sequence boundaries
424 /// assert!(v.get(1..).is_none());
425 /// assert!(v.get(..8).is_none());
426 ///
427 /// // out of bounds
428 /// assert!(v.get(..42).is_none());
429 /// ```
430 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
431 #[inline]
get<I: SliceIndex<str>>(&self, i: I) -> Option<&I::Output>432 pub fn get<I: SliceIndex<str>>(&self, i: I) -> Option<&I::Output> {
433 i.get(self)
434 }
435
436 /// Returns a mutable subslice of `str`.
437 ///
438 /// This is the non-panicking alternative to indexing the `str`. Returns
439 /// [`None`] whenever equivalent indexing operation would panic.
440 ///
441 /// # Examples
442 ///
443 /// ```
444 /// let mut v = String::from("hello");
445 /// // correct length
446 /// assert!(v.get_mut(0..5).is_some());
447 /// // out of bounds
448 /// assert!(v.get_mut(..42).is_none());
449 /// assert_eq!(Some("he"), v.get_mut(0..2).map(|v| &*v));
450 ///
451 /// assert_eq!("hello", v);
452 /// {
453 /// let s = v.get_mut(0..2);
454 /// let s = s.map(|s| {
455 /// s.make_ascii_uppercase();
456 /// &*s
457 /// });
458 /// assert_eq!(Some("HE"), s);
459 /// }
460 /// assert_eq!("HEllo", v);
461 /// ```
462 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
463 #[inline]
get_mut<I: SliceIndex<str>>(&mut self, i: I) -> Option<&mut I::Output>464 pub fn get_mut<I: SliceIndex<str>>(&mut self, i: I) -> Option<&mut I::Output> {
465 i.get_mut(self)
466 }
467
468 /// Returns an unchecked subslice of `str`.
469 ///
470 /// This is the unchecked alternative to indexing the `str`.
471 ///
472 /// # Safety
473 ///
474 /// Callers of this function are responsible that these preconditions are
475 /// satisfied:
476 ///
477 /// * The starting index must not exceed the ending index;
478 /// * Indexes must be within bounds of the original slice;
479 /// * Indexes must lie on UTF-8 sequence boundaries.
480 ///
481 /// Failing that, the returned string slice may reference invalid memory or
482 /// violate the invariants communicated by the `str` type.
483 ///
484 /// # Examples
485 ///
486 /// ```
487 /// let v = "∈";
488 /// unsafe {
489 /// assert_eq!("", v.get_unchecked(0..4));
490 /// assert_eq!("∈", v.get_unchecked(4..7));
491 /// assert_eq!("", v.get_unchecked(7..11));
492 /// }
493 /// ```
494 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
495 #[inline]
get_unchecked<I: SliceIndex<str>>(&self, i: I) -> &I::Output496 pub unsafe fn get_unchecked<I: SliceIndex<str>>(&self, i: I) -> &I::Output {
497 // SAFETY: the caller must uphold the safety contract for `get_unchecked`;
498 // the slice is dereferenceable because `self` is a safe reference.
499 // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
500 unsafe { &*i.get_unchecked(self) }
501 }
502
503 /// Returns a mutable, unchecked subslice of `str`.
504 ///
505 /// This is the unchecked alternative to indexing the `str`.
506 ///
507 /// # Safety
508 ///
509 /// Callers of this function are responsible that these preconditions are
510 /// satisfied:
511 ///
512 /// * The starting index must not exceed the ending index;
513 /// * Indexes must be within bounds of the original slice;
514 /// * Indexes must lie on UTF-8 sequence boundaries.
515 ///
516 /// Failing that, the returned string slice may reference invalid memory or
517 /// violate the invariants communicated by the `str` type.
518 ///
519 /// # Examples
520 ///
521 /// ```
522 /// let mut v = String::from("∈");
523 /// unsafe {
524 /// assert_eq!("", v.get_unchecked_mut(0..4));
525 /// assert_eq!("∈", v.get_unchecked_mut(4..7));
526 /// assert_eq!("", v.get_unchecked_mut(7..11));
527 /// }
528 /// ```
529 #[stable(feature = "str_checked_slicing", since = "1.20.0")]
530 #[inline]
get_unchecked_mut<I: SliceIndex<str>>(&mut self, i: I) -> &mut I::Output531 pub unsafe fn get_unchecked_mut<I: SliceIndex<str>>(&mut self, i: I) -> &mut I::Output {
532 // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`;
533 // the slice is dereferenceable because `self` is a safe reference.
534 // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
535 unsafe { &mut *i.get_unchecked_mut(self) }
536 }
537
538 /// Creates a string slice from another string slice, bypassing safety
539 /// checks.
540 ///
541 /// This is generally not recommended, use with caution! For a safe
542 /// alternative see [`str`] and [`Index`].
543 ///
544 /// [`Index`]: crate::ops::Index
545 ///
546 /// This new slice goes from `begin` to `end`, including `begin` but
547 /// excluding `end`.
548 ///
549 /// To get a mutable string slice instead, see the
550 /// [`slice_mut_unchecked`] method.
551 ///
552 /// [`slice_mut_unchecked`]: str::slice_mut_unchecked
553 ///
554 /// # Safety
555 ///
556 /// Callers of this function are responsible that three preconditions are
557 /// satisfied:
558 ///
559 /// * `begin` must not exceed `end`.
560 /// * `begin` and `end` must be byte positions within the string slice.
561 /// * `begin` and `end` must lie on UTF-8 sequence boundaries.
562 ///
563 /// # Examples
564 ///
565 /// ```
566 /// let s = "Löwe 老虎 Léopard";
567 ///
568 /// unsafe {
569 /// assert_eq!("Löwe 老虎 Léopard", s.slice_unchecked(0, 21));
570 /// }
571 ///
572 /// let s = "Hello, world!";
573 ///
574 /// unsafe {
575 /// assert_eq!("world", s.slice_unchecked(7, 12));
576 /// }
577 /// ```
578 #[stable(feature = "rust1", since = "1.0.0")]
579 #[deprecated(since = "1.29.0", note = "use `get_unchecked(begin..end)` instead")]
580 #[must_use]
581 #[inline]
slice_unchecked(&self, begin: usize, end: usize) -> &str582 pub unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
583 // SAFETY: the caller must uphold the safety contract for `get_unchecked`;
584 // the slice is dereferenceable because `self` is a safe reference.
585 // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
586 unsafe { &*(begin..end).get_unchecked(self) }
587 }
588
589 /// Creates a string slice from another string slice, bypassing safety
590 /// checks.
591 /// This is generally not recommended, use with caution! For a safe
592 /// alternative see [`str`] and [`IndexMut`].
593 ///
594 /// [`IndexMut`]: crate::ops::IndexMut
595 ///
596 /// This new slice goes from `begin` to `end`, including `begin` but
597 /// excluding `end`.
598 ///
599 /// To get an immutable string slice instead, see the
600 /// [`slice_unchecked`] method.
601 ///
602 /// [`slice_unchecked`]: str::slice_unchecked
603 ///
604 /// # Safety
605 ///
606 /// Callers of this function are responsible that three preconditions are
607 /// satisfied:
608 ///
609 /// * `begin` must not exceed `end`.
610 /// * `begin` and `end` must be byte positions within the string slice.
611 /// * `begin` and `end` must lie on UTF-8 sequence boundaries.
612 #[stable(feature = "str_slice_mut", since = "1.5.0")]
613 #[deprecated(since = "1.29.0", note = "use `get_unchecked_mut(begin..end)` instead")]
614 #[inline]
slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str615 pub unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str {
616 // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`;
617 // the slice is dereferenceable because `self` is a safe reference.
618 // The returned pointer is safe because impls of `SliceIndex` have to guarantee that it is.
619 unsafe { &mut *(begin..end).get_unchecked_mut(self) }
620 }
621
622 /// Divide one string slice into two at an index.
623 ///
624 /// The argument, `mid`, should be a byte offset from the start of the
625 /// string. It must also be on the boundary of a UTF-8 code point.
626 ///
627 /// The two slices returned go from the start of the string slice to `mid`,
628 /// and from `mid` to the end of the string slice.
629 ///
630 /// To get mutable string slices instead, see the [`split_at_mut`]
631 /// method.
632 ///
633 /// [`split_at_mut`]: str::split_at_mut
634 ///
635 /// # Panics
636 ///
637 /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is
638 /// past the end of the last code point of the string slice.
639 ///
640 /// # Examples
641 ///
642 /// ```
643 /// let s = "Per Martin-Löf";
644 ///
645 /// let (first, last) = s.split_at(3);
646 ///
647 /// assert_eq!("Per", first);
648 /// assert_eq!(" Martin-Löf", last);
649 /// ```
650 #[inline]
651 #[must_use]
652 #[stable(feature = "str_split_at", since = "1.4.0")]
split_at(&self, mid: usize) -> (&str, &str)653 pub fn split_at(&self, mid: usize) -> (&str, &str) {
654 // is_char_boundary checks that the index is in [0, .len()]
655 if self.is_char_boundary(mid) {
656 // SAFETY: just checked that `mid` is on a char boundary.
657 unsafe { (self.get_unchecked(0..mid), self.get_unchecked(mid..self.len())) }
658 } else {
659 slice_error_fail(self, 0, mid)
660 }
661 }
662
663 /// Divide one mutable string slice into two at an index.
664 ///
665 /// The argument, `mid`, should be a byte offset from the start of the
666 /// string. It must also be on the boundary of a UTF-8 code point.
667 ///
668 /// The two slices returned go from the start of the string slice to `mid`,
669 /// and from `mid` to the end of the string slice.
670 ///
671 /// To get immutable string slices instead, see the [`split_at`] method.
672 ///
673 /// [`split_at`]: str::split_at
674 ///
675 /// # Panics
676 ///
677 /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is
678 /// past the end of the last code point of the string slice.
679 ///
680 /// # Examples
681 ///
682 /// ```
683 /// let mut s = "Per Martin-Löf".to_string();
684 /// {
685 /// let (first, last) = s.split_at_mut(3);
686 /// first.make_ascii_uppercase();
687 /// assert_eq!("PER", first);
688 /// assert_eq!(" Martin-Löf", last);
689 /// }
690 /// assert_eq!("PER Martin-Löf", s);
691 /// ```
692 #[inline]
693 #[must_use]
694 #[stable(feature = "str_split_at", since = "1.4.0")]
split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str)695 pub fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) {
696 // is_char_boundary checks that the index is in [0, .len()]
697 if self.is_char_boundary(mid) {
698 let len = self.len();
699 let ptr = self.as_mut_ptr();
700 // SAFETY: just checked that `mid` is on a char boundary.
701 unsafe {
702 (
703 from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr, mid)),
704 from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr.add(mid), len - mid)),
705 )
706 }
707 } else {
708 slice_error_fail(self, 0, mid)
709 }
710 }
711
712 /// Returns an iterator over the [`char`]s of a string slice.
713 ///
714 /// As a string slice consists of valid UTF-8, we can iterate through a
715 /// string slice by [`char`]. This method returns such an iterator.
716 ///
717 /// It's important to remember that [`char`] represents a Unicode Scalar
718 /// Value, and might not match your idea of what a 'character' is. Iteration
719 /// over grapheme clusters may be what you actually want. This functionality
720 /// is not provided by Rust's standard library, check crates.io instead.
721 ///
722 /// # Examples
723 ///
724 /// Basic usage:
725 ///
726 /// ```
727 /// let word = "goodbye";
728 ///
729 /// let count = word.chars().count();
730 /// assert_eq!(7, count);
731 ///
732 /// let mut chars = word.chars();
733 ///
734 /// assert_eq!(Some('g'), chars.next());
735 /// assert_eq!(Some('o'), chars.next());
736 /// assert_eq!(Some('o'), chars.next());
737 /// assert_eq!(Some('d'), chars.next());
738 /// assert_eq!(Some('b'), chars.next());
739 /// assert_eq!(Some('y'), chars.next());
740 /// assert_eq!(Some('e'), chars.next());
741 ///
742 /// assert_eq!(None, chars.next());
743 /// ```
744 ///
745 /// Remember, [`char`]s might not match your intuition about characters:
746 ///
747 /// [`char`]: prim@char
748 ///
749 /// ```
750 /// let y = "y̆";
751 ///
752 /// let mut chars = y.chars();
753 ///
754 /// assert_eq!(Some('y'), chars.next()); // not 'y̆'
755 /// assert_eq!(Some('\u{0306}'), chars.next());
756 ///
757 /// assert_eq!(None, chars.next());
758 /// ```
759 #[stable(feature = "rust1", since = "1.0.0")]
760 #[inline]
chars(&self) -> Chars<'_>761 pub fn chars(&self) -> Chars<'_> {
762 Chars { iter: self.as_bytes().iter() }
763 }
764
765 /// Returns an iterator over the [`char`]s of a string slice, and their
766 /// positions.
767 ///
768 /// As a string slice consists of valid UTF-8, we can iterate through a
769 /// string slice by [`char`]. This method returns an iterator of both
770 /// these [`char`]s, as well as their byte positions.
771 ///
772 /// The iterator yields tuples. The position is first, the [`char`] is
773 /// second.
774 ///
775 /// # Examples
776 ///
777 /// Basic usage:
778 ///
779 /// ```
780 /// let word = "goodbye";
781 ///
782 /// let count = word.char_indices().count();
783 /// assert_eq!(7, count);
784 ///
785 /// let mut char_indices = word.char_indices();
786 ///
787 /// assert_eq!(Some((0, 'g')), char_indices.next());
788 /// assert_eq!(Some((1, 'o')), char_indices.next());
789 /// assert_eq!(Some((2, 'o')), char_indices.next());
790 /// assert_eq!(Some((3, 'd')), char_indices.next());
791 /// assert_eq!(Some((4, 'b')), char_indices.next());
792 /// assert_eq!(Some((5, 'y')), char_indices.next());
793 /// assert_eq!(Some((6, 'e')), char_indices.next());
794 ///
795 /// assert_eq!(None, char_indices.next());
796 /// ```
797 ///
798 /// Remember, [`char`]s might not match your intuition about characters:
799 ///
800 /// [`char`]: prim@char
801 ///
802 /// ```
803 /// let yes = "y̆es";
804 ///
805 /// let mut char_indices = yes.char_indices();
806 ///
807 /// assert_eq!(Some((0, 'y')), char_indices.next()); // not (0, 'y̆')
808 /// assert_eq!(Some((1, '\u{0306}')), char_indices.next());
809 ///
810 /// // note the 3 here - the last character took up two bytes
811 /// assert_eq!(Some((3, 'e')), char_indices.next());
812 /// assert_eq!(Some((4, 's')), char_indices.next());
813 ///
814 /// assert_eq!(None, char_indices.next());
815 /// ```
816 #[stable(feature = "rust1", since = "1.0.0")]
817 #[inline]
char_indices(&self) -> CharIndices<'_>818 pub fn char_indices(&self) -> CharIndices<'_> {
819 CharIndices { front_offset: 0, iter: self.chars() }
820 }
821
822 /// An iterator over the bytes of a string slice.
823 ///
824 /// As a string slice consists of a sequence of bytes, we can iterate
825 /// through a string slice by byte. This method returns such an iterator.
826 ///
827 /// # Examples
828 ///
829 /// ```
830 /// let mut bytes = "bors".bytes();
831 ///
832 /// assert_eq!(Some(b'b'), bytes.next());
833 /// assert_eq!(Some(b'o'), bytes.next());
834 /// assert_eq!(Some(b'r'), bytes.next());
835 /// assert_eq!(Some(b's'), bytes.next());
836 ///
837 /// assert_eq!(None, bytes.next());
838 /// ```
839 #[stable(feature = "rust1", since = "1.0.0")]
840 #[inline]
bytes(&self) -> Bytes<'_>841 pub fn bytes(&self) -> Bytes<'_> {
842 Bytes(self.as_bytes().iter().copied())
843 }
844
845 /// Splits a string slice by whitespace.
846 ///
847 /// The iterator returned will return string slices that are sub-slices of
848 /// the original string slice, separated by any amount of whitespace.
849 ///
850 /// 'Whitespace' is defined according to the terms of the Unicode Derived
851 /// Core Property `White_Space`. If you only want to split on ASCII whitespace
852 /// instead, use [`split_ascii_whitespace`].
853 ///
854 /// [`split_ascii_whitespace`]: str::split_ascii_whitespace
855 ///
856 /// # Examples
857 ///
858 /// Basic usage:
859 ///
860 /// ```
861 /// let mut iter = "A few words".split_whitespace();
862 ///
863 /// assert_eq!(Some("A"), iter.next());
864 /// assert_eq!(Some("few"), iter.next());
865 /// assert_eq!(Some("words"), iter.next());
866 ///
867 /// assert_eq!(None, iter.next());
868 /// ```
869 ///
870 /// All kinds of whitespace are considered:
871 ///
872 /// ```
873 /// let mut iter = " Mary had\ta\u{2009}little \n\t lamb".split_whitespace();
874 /// assert_eq!(Some("Mary"), iter.next());
875 /// assert_eq!(Some("had"), iter.next());
876 /// assert_eq!(Some("a"), iter.next());
877 /// assert_eq!(Some("little"), iter.next());
878 /// assert_eq!(Some("lamb"), iter.next());
879 ///
880 /// assert_eq!(None, iter.next());
881 /// ```
882 ///
883 /// If the string is empty or all whitespace, the iterator yields no string slices:
884 /// ```
885 /// assert_eq!("".split_whitespace().next(), None);
886 /// assert_eq!(" ".split_whitespace().next(), None);
887 /// ```
888 #[must_use = "this returns the split string as an iterator, \
889 without modifying the original"]
890 #[stable(feature = "split_whitespace", since = "1.1.0")]
891 #[cfg_attr(not(test), rustc_diagnostic_item = "str_split_whitespace")]
892 #[inline]
split_whitespace(&self) -> SplitWhitespace<'_>893 pub fn split_whitespace(&self) -> SplitWhitespace<'_> {
894 SplitWhitespace { inner: self.split(IsWhitespace).filter(IsNotEmpty) }
895 }
896
897 /// Splits a string slice by ASCII whitespace.
898 ///
899 /// The iterator returned will return string slices that are sub-slices of
900 /// the original string slice, separated by any amount of ASCII whitespace.
901 ///
902 /// To split by Unicode `Whitespace` instead, use [`split_whitespace`].
903 ///
904 /// [`split_whitespace`]: str::split_whitespace
905 ///
906 /// # Examples
907 ///
908 /// Basic usage:
909 ///
910 /// ```
911 /// let mut iter = "A few words".split_ascii_whitespace();
912 ///
913 /// assert_eq!(Some("A"), iter.next());
914 /// assert_eq!(Some("few"), iter.next());
915 /// assert_eq!(Some("words"), iter.next());
916 ///
917 /// assert_eq!(None, iter.next());
918 /// ```
919 ///
920 /// All kinds of ASCII whitespace are considered:
921 ///
922 /// ```
923 /// let mut iter = " Mary had\ta little \n\t lamb".split_ascii_whitespace();
924 /// assert_eq!(Some("Mary"), iter.next());
925 /// assert_eq!(Some("had"), iter.next());
926 /// assert_eq!(Some("a"), iter.next());
927 /// assert_eq!(Some("little"), iter.next());
928 /// assert_eq!(Some("lamb"), iter.next());
929 ///
930 /// assert_eq!(None, iter.next());
931 /// ```
932 ///
933 /// If the string is empty or all ASCII whitespace, the iterator yields no string slices:
934 /// ```
935 /// assert_eq!("".split_ascii_whitespace().next(), None);
936 /// assert_eq!(" ".split_ascii_whitespace().next(), None);
937 /// ```
938 #[must_use = "this returns the split string as an iterator, \
939 without modifying the original"]
940 #[stable(feature = "split_ascii_whitespace", since = "1.34.0")]
941 #[inline]
split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_>942 pub fn split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_> {
943 let inner =
944 self.as_bytes().split(IsAsciiWhitespace).filter(BytesIsNotEmpty).map(UnsafeBytesToStr);
945 SplitAsciiWhitespace { inner }
946 }
947
948 /// An iterator over the lines of a string, as string slices.
949 ///
950 /// Lines are split at line endings that are either newlines (`\n`) or
951 /// sequences of a carriage return followed by a line feed (`\r\n`).
952 ///
953 /// Line terminators are not included in the lines returned by the iterator.
954 ///
955 /// The final line ending is optional. A string that ends with a final line
956 /// ending will return the same lines as an otherwise identical string
957 /// without a final line ending.
958 ///
959 /// # Examples
960 ///
961 /// Basic usage:
962 ///
963 /// ```
964 /// let text = "foo\r\nbar\n\nbaz\n";
965 /// let mut lines = text.lines();
966 ///
967 /// assert_eq!(Some("foo"), lines.next());
968 /// assert_eq!(Some("bar"), lines.next());
969 /// assert_eq!(Some(""), lines.next());
970 /// assert_eq!(Some("baz"), lines.next());
971 ///
972 /// assert_eq!(None, lines.next());
973 /// ```
974 ///
975 /// The final line ending isn't required:
976 ///
977 /// ```
978 /// let text = "foo\nbar\n\r\nbaz";
979 /// let mut lines = text.lines();
980 ///
981 /// assert_eq!(Some("foo"), lines.next());
982 /// assert_eq!(Some("bar"), lines.next());
983 /// assert_eq!(Some(""), lines.next());
984 /// assert_eq!(Some("baz"), lines.next());
985 ///
986 /// assert_eq!(None, lines.next());
987 /// ```
988 #[stable(feature = "rust1", since = "1.0.0")]
989 #[inline]
lines(&self) -> Lines<'_>990 pub fn lines(&self) -> Lines<'_> {
991 Lines(self.split_inclusive('\n').map(LinesMap))
992 }
993
994 /// An iterator over the lines of a string.
995 #[stable(feature = "rust1", since = "1.0.0")]
996 #[deprecated(since = "1.4.0", note = "use lines() instead now")]
997 #[inline]
998 #[allow(deprecated)]
lines_any(&self) -> LinesAny<'_>999 pub fn lines_any(&self) -> LinesAny<'_> {
1000 LinesAny(self.lines())
1001 }
1002
1003 /// Returns an iterator of `u16` over the string encoded as UTF-16.
1004 ///
1005 /// # Examples
1006 ///
1007 /// ```
1008 /// let text = "Zażółć gęślą jaźń";
1009 ///
1010 /// let utf8_len = text.len();
1011 /// let utf16_len = text.encode_utf16().count();
1012 ///
1013 /// assert!(utf16_len <= utf8_len);
1014 /// ```
1015 #[must_use = "this returns the encoded string as an iterator, \
1016 without modifying the original"]
1017 #[stable(feature = "encode_utf16", since = "1.8.0")]
encode_utf16(&self) -> EncodeUtf16<'_>1018 pub fn encode_utf16(&self) -> EncodeUtf16<'_> {
1019 EncodeUtf16 { chars: self.chars(), extra: 0 }
1020 }
1021
1022 /// Returns `true` if the given pattern matches a sub-slice of
1023 /// this string slice.
1024 ///
1025 /// Returns `false` if it does not.
1026 ///
1027 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1028 /// function or closure that determines if a character matches.
1029 ///
1030 /// [`char`]: prim@char
1031 /// [pattern]: self::pattern
1032 ///
1033 /// # Examples
1034 ///
1035 /// ```
1036 /// let bananas = "bananas";
1037 ///
1038 /// assert!(bananas.contains("nana"));
1039 /// assert!(!bananas.contains("apples"));
1040 /// ```
1041 #[stable(feature = "rust1", since = "1.0.0")]
1042 #[inline]
contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool1043 pub fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
1044 pat.is_contained_in(self)
1045 }
1046
1047 /// Returns `true` if the given pattern matches a prefix of this
1048 /// string slice.
1049 ///
1050 /// Returns `false` if it does not.
1051 ///
1052 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1053 /// function or closure that determines if a character matches.
1054 ///
1055 /// [`char`]: prim@char
1056 /// [pattern]: self::pattern
1057 ///
1058 /// # Examples
1059 ///
1060 /// ```
1061 /// let bananas = "bananas";
1062 ///
1063 /// assert!(bananas.starts_with("bana"));
1064 /// assert!(!bananas.starts_with("nana"));
1065 /// ```
1066 #[stable(feature = "rust1", since = "1.0.0")]
starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool1067 pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
1068 pat.is_prefix_of(self)
1069 }
1070
1071 /// Returns `true` if the given pattern matches a suffix of this
1072 /// string slice.
1073 ///
1074 /// Returns `false` if it does not.
1075 ///
1076 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1077 /// function or closure that determines if a character matches.
1078 ///
1079 /// [`char`]: prim@char
1080 /// [pattern]: self::pattern
1081 ///
1082 /// # Examples
1083 ///
1084 /// ```
1085 /// let bananas = "bananas";
1086 ///
1087 /// assert!(bananas.ends_with("anas"));
1088 /// assert!(!bananas.ends_with("nana"));
1089 /// ```
1090 #[stable(feature = "rust1", since = "1.0.0")]
ends_with<'a, P>(&'a self, pat: P) -> bool where P: Pattern<'a, Searcher: ReverseSearcher<'a>>,1091 pub fn ends_with<'a, P>(&'a self, pat: P) -> bool
1092 where
1093 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1094 {
1095 pat.is_suffix_of(self)
1096 }
1097
1098 /// Returns the byte index of the first character of this string slice that
1099 /// matches the pattern.
1100 ///
1101 /// Returns [`None`] if the pattern doesn't match.
1102 ///
1103 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1104 /// function or closure that determines if a character matches.
1105 ///
1106 /// [`char`]: prim@char
1107 /// [pattern]: self::pattern
1108 ///
1109 /// # Examples
1110 ///
1111 /// Simple patterns:
1112 ///
1113 /// ```
1114 /// let s = "Löwe 老虎 Léopard Gepardi";
1115 ///
1116 /// assert_eq!(s.find('L'), Some(0));
1117 /// assert_eq!(s.find('é'), Some(14));
1118 /// assert_eq!(s.find("pard"), Some(17));
1119 /// ```
1120 ///
1121 /// More complex patterns using point-free style and closures:
1122 ///
1123 /// ```
1124 /// let s = "Löwe 老虎 Léopard";
1125 ///
1126 /// assert_eq!(s.find(char::is_whitespace), Some(5));
1127 /// assert_eq!(s.find(char::is_lowercase), Some(1));
1128 /// assert_eq!(s.find(|c: char| c.is_whitespace() || c.is_lowercase()), Some(1));
1129 /// assert_eq!(s.find(|c: char| (c < 'o') && (c > 'a')), Some(4));
1130 /// ```
1131 ///
1132 /// Not finding the pattern:
1133 ///
1134 /// ```
1135 /// let s = "Löwe 老虎 Léopard";
1136 /// let x: &[_] = &['1', '2'];
1137 ///
1138 /// assert_eq!(s.find(x), None);
1139 /// ```
1140 #[stable(feature = "rust1", since = "1.0.0")]
1141 #[inline]
find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>1142 pub fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
1143 pat.into_searcher(self).next_match().map(|(i, _)| i)
1144 }
1145
1146 /// Returns the byte index for the first character of the last match of the pattern in
1147 /// this string slice.
1148 ///
1149 /// Returns [`None`] if the pattern doesn't match.
1150 ///
1151 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1152 /// function or closure that determines if a character matches.
1153 ///
1154 /// [`char`]: prim@char
1155 /// [pattern]: self::pattern
1156 ///
1157 /// # Examples
1158 ///
1159 /// Simple patterns:
1160 ///
1161 /// ```
1162 /// let s = "Löwe 老虎 Léopard Gepardi";
1163 ///
1164 /// assert_eq!(s.rfind('L'), Some(13));
1165 /// assert_eq!(s.rfind('é'), Some(14));
1166 /// assert_eq!(s.rfind("pard"), Some(24));
1167 /// ```
1168 ///
1169 /// More complex patterns with closures:
1170 ///
1171 /// ```
1172 /// let s = "Löwe 老虎 Léopard";
1173 ///
1174 /// assert_eq!(s.rfind(char::is_whitespace), Some(12));
1175 /// assert_eq!(s.rfind(char::is_lowercase), Some(20));
1176 /// ```
1177 ///
1178 /// Not finding the pattern:
1179 ///
1180 /// ```
1181 /// let s = "Löwe 老虎 Léopard";
1182 /// let x: &[_] = &['1', '2'];
1183 ///
1184 /// assert_eq!(s.rfind(x), None);
1185 /// ```
1186 #[stable(feature = "rust1", since = "1.0.0")]
1187 #[inline]
rfind<'a, P>(&'a self, pat: P) -> Option<usize> where P: Pattern<'a, Searcher: ReverseSearcher<'a>>,1188 pub fn rfind<'a, P>(&'a self, pat: P) -> Option<usize>
1189 where
1190 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1191 {
1192 pat.into_searcher(self).next_match_back().map(|(i, _)| i)
1193 }
1194
1195 /// An iterator over substrings of this string slice, separated by
1196 /// characters matched by a pattern.
1197 ///
1198 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1199 /// function or closure that determines if a character matches.
1200 ///
1201 /// [`char`]: prim@char
1202 /// [pattern]: self::pattern
1203 ///
1204 /// # Iterator behavior
1205 ///
1206 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
1207 /// allows a reverse search and forward/reverse search yields the same
1208 /// elements. This is true for, e.g., [`char`], but not for `&str`.
1209 ///
1210 /// If the pattern allows a reverse search but its results might differ
1211 /// from a forward search, the [`rsplit`] method can be used.
1212 ///
1213 /// [`rsplit`]: str::rsplit
1214 ///
1215 /// # Examples
1216 ///
1217 /// Simple patterns:
1218 ///
1219 /// ```
1220 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
1221 /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
1222 ///
1223 /// let v: Vec<&str> = "".split('X').collect();
1224 /// assert_eq!(v, [""]);
1225 ///
1226 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
1227 /// assert_eq!(v, ["lion", "", "tiger", "leopard"]);
1228 ///
1229 /// let v: Vec<&str> = "lion::tiger::leopard".split("::").collect();
1230 /// assert_eq!(v, ["lion", "tiger", "leopard"]);
1231 ///
1232 /// let v: Vec<&str> = "abc1def2ghi".split(char::is_numeric).collect();
1233 /// assert_eq!(v, ["abc", "def", "ghi"]);
1234 ///
1235 /// let v: Vec<&str> = "lionXtigerXleopard".split(char::is_uppercase).collect();
1236 /// assert_eq!(v, ["lion", "tiger", "leopard"]);
1237 /// ```
1238 ///
1239 /// If the pattern is a slice of chars, split on each occurrence of any of the characters:
1240 ///
1241 /// ```
1242 /// let v: Vec<&str> = "2020-11-03 23:59".split(&['-', ' ', ':', '@'][..]).collect();
1243 /// assert_eq!(v, ["2020", "11", "03", "23", "59"]);
1244 /// ```
1245 ///
1246 /// A more complex pattern, using a closure:
1247 ///
1248 /// ```
1249 /// let v: Vec<&str> = "abc1defXghi".split(|c| c == '1' || c == 'X').collect();
1250 /// assert_eq!(v, ["abc", "def", "ghi"]);
1251 /// ```
1252 ///
1253 /// If a string contains multiple contiguous separators, you will end up
1254 /// with empty strings in the output:
1255 ///
1256 /// ```
1257 /// let x = "||||a||b|c".to_string();
1258 /// let d: Vec<_> = x.split('|').collect();
1259 ///
1260 /// assert_eq!(d, &["", "", "", "", "a", "", "b", "c"]);
1261 /// ```
1262 ///
1263 /// Contiguous separators are separated by the empty string.
1264 ///
1265 /// ```
1266 /// let x = "(///)".to_string();
1267 /// let d: Vec<_> = x.split('/').collect();
1268 ///
1269 /// assert_eq!(d, &["(", "", "", ")"]);
1270 /// ```
1271 ///
1272 /// Separators at the start or end of a string are neighbored
1273 /// by empty strings.
1274 ///
1275 /// ```
1276 /// let d: Vec<_> = "010".split("0").collect();
1277 /// assert_eq!(d, &["", "1", ""]);
1278 /// ```
1279 ///
1280 /// When the empty string is used as a separator, it separates
1281 /// every character in the string, along with the beginning
1282 /// and end of the string.
1283 ///
1284 /// ```
1285 /// let f: Vec<_> = "rust".split("").collect();
1286 /// assert_eq!(f, &["", "r", "u", "s", "t", ""]);
1287 /// ```
1288 ///
1289 /// Contiguous separators can lead to possibly surprising behavior
1290 /// when whitespace is used as the separator. This code is correct:
1291 ///
1292 /// ```
1293 /// let x = " a b c".to_string();
1294 /// let d: Vec<_> = x.split(' ').collect();
1295 ///
1296 /// assert_eq!(d, &["", "", "", "", "a", "", "b", "c"]);
1297 /// ```
1298 ///
1299 /// It does _not_ give you:
1300 ///
1301 /// ```,ignore
1302 /// assert_eq!(d, &["a", "b", "c"]);
1303 /// ```
1304 ///
1305 /// Use [`split_whitespace`] for this behavior.
1306 ///
1307 /// [`split_whitespace`]: str::split_whitespace
1308 #[stable(feature = "rust1", since = "1.0.0")]
1309 #[inline]
split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P>1310 pub fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
1311 Split(SplitInternal {
1312 start: 0,
1313 end: self.len(),
1314 matcher: pat.into_searcher(self),
1315 allow_trailing_empty: true,
1316 finished: false,
1317 })
1318 }
1319
1320 /// An iterator over substrings of this string slice, separated by
1321 /// characters matched by a pattern. Differs from the iterator produced by
1322 /// `split` in that `split_inclusive` leaves the matched part as the
1323 /// terminator of the substring.
1324 ///
1325 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1326 /// function or closure that determines if a character matches.
1327 ///
1328 /// [`char`]: prim@char
1329 /// [pattern]: self::pattern
1330 ///
1331 /// # Examples
1332 ///
1333 /// ```
1334 /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb."
1335 /// .split_inclusive('\n').collect();
1336 /// assert_eq!(v, ["Mary had a little lamb\n", "little lamb\n", "little lamb."]);
1337 /// ```
1338 ///
1339 /// If the last element of the string is matched,
1340 /// that element will be considered the terminator of the preceding substring.
1341 /// That substring will be the last item returned by the iterator.
1342 ///
1343 /// ```
1344 /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb.\n"
1345 /// .split_inclusive('\n').collect();
1346 /// assert_eq!(v, ["Mary had a little lamb\n", "little lamb\n", "little lamb.\n"]);
1347 /// ```
1348 #[stable(feature = "split_inclusive", since = "1.51.0")]
1349 #[inline]
split_inclusive<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitInclusive<'a, P>1350 pub fn split_inclusive<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitInclusive<'a, P> {
1351 SplitInclusive(SplitInternal {
1352 start: 0,
1353 end: self.len(),
1354 matcher: pat.into_searcher(self),
1355 allow_trailing_empty: false,
1356 finished: false,
1357 })
1358 }
1359
1360 /// An iterator over substrings of the given string slice, separated by
1361 /// characters matched by a pattern and yielded in reverse order.
1362 ///
1363 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1364 /// function or closure that determines if a character matches.
1365 ///
1366 /// [`char`]: prim@char
1367 /// [pattern]: self::pattern
1368 ///
1369 /// # Iterator behavior
1370 ///
1371 /// The returned iterator requires that the pattern supports a reverse
1372 /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse
1373 /// search yields the same elements.
1374 ///
1375 /// For iterating from the front, the [`split`] method can be used.
1376 ///
1377 /// [`split`]: str::split
1378 ///
1379 /// # Examples
1380 ///
1381 /// Simple patterns:
1382 ///
1383 /// ```
1384 /// let v: Vec<&str> = "Mary had a little lamb".rsplit(' ').collect();
1385 /// assert_eq!(v, ["lamb", "little", "a", "had", "Mary"]);
1386 ///
1387 /// let v: Vec<&str> = "".rsplit('X').collect();
1388 /// assert_eq!(v, [""]);
1389 ///
1390 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplit('X').collect();
1391 /// assert_eq!(v, ["leopard", "tiger", "", "lion"]);
1392 ///
1393 /// let v: Vec<&str> = "lion::tiger::leopard".rsplit("::").collect();
1394 /// assert_eq!(v, ["leopard", "tiger", "lion"]);
1395 /// ```
1396 ///
1397 /// A more complex pattern, using a closure:
1398 ///
1399 /// ```
1400 /// let v: Vec<&str> = "abc1defXghi".rsplit(|c| c == '1' || c == 'X').collect();
1401 /// assert_eq!(v, ["ghi", "def", "abc"]);
1402 /// ```
1403 #[stable(feature = "rust1", since = "1.0.0")]
1404 #[inline]
rsplit<'a, P>(&'a self, pat: P) -> RSplit<'a, P> where P: Pattern<'a, Searcher: ReverseSearcher<'a>>,1405 pub fn rsplit<'a, P>(&'a self, pat: P) -> RSplit<'a, P>
1406 where
1407 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1408 {
1409 RSplit(self.split(pat).0)
1410 }
1411
1412 /// An iterator over substrings of the given string slice, separated by
1413 /// characters matched by a pattern.
1414 ///
1415 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1416 /// function or closure that determines if a character matches.
1417 ///
1418 /// [`char`]: prim@char
1419 /// [pattern]: self::pattern
1420 ///
1421 /// Equivalent to [`split`], except that the trailing substring
1422 /// is skipped if empty.
1423 ///
1424 /// [`split`]: str::split
1425 ///
1426 /// This method can be used for string data that is _terminated_,
1427 /// rather than _separated_ by a pattern.
1428 ///
1429 /// # Iterator behavior
1430 ///
1431 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
1432 /// allows a reverse search and forward/reverse search yields the same
1433 /// elements. This is true for, e.g., [`char`], but not for `&str`.
1434 ///
1435 /// If the pattern allows a reverse search but its results might differ
1436 /// from a forward search, the [`rsplit_terminator`] method can be used.
1437 ///
1438 /// [`rsplit_terminator`]: str::rsplit_terminator
1439 ///
1440 /// # Examples
1441 ///
1442 /// ```
1443 /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
1444 /// assert_eq!(v, ["A", "B"]);
1445 ///
1446 /// let v: Vec<&str> = "A..B..".split_terminator(".").collect();
1447 /// assert_eq!(v, ["A", "", "B", ""]);
1448 ///
1449 /// let v: Vec<&str> = "A.B:C.D".split_terminator(&['.', ':'][..]).collect();
1450 /// assert_eq!(v, ["A", "B", "C", "D"]);
1451 /// ```
1452 #[stable(feature = "rust1", since = "1.0.0")]
1453 #[inline]
split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>1454 pub fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
1455 SplitTerminator(SplitInternal { allow_trailing_empty: false, ..self.split(pat).0 })
1456 }
1457
1458 /// An iterator over substrings of `self`, separated by characters
1459 /// matched by a pattern and yielded in reverse order.
1460 ///
1461 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1462 /// function or closure that determines if a character matches.
1463 ///
1464 /// [`char`]: prim@char
1465 /// [pattern]: self::pattern
1466 ///
1467 /// Equivalent to [`split`], except that the trailing substring is
1468 /// skipped if empty.
1469 ///
1470 /// [`split`]: str::split
1471 ///
1472 /// This method can be used for string data that is _terminated_,
1473 /// rather than _separated_ by a pattern.
1474 ///
1475 /// # Iterator behavior
1476 ///
1477 /// The returned iterator requires that the pattern supports a
1478 /// reverse search, and it will be double ended if a forward/reverse
1479 /// search yields the same elements.
1480 ///
1481 /// For iterating from the front, the [`split_terminator`] method can be
1482 /// used.
1483 ///
1484 /// [`split_terminator`]: str::split_terminator
1485 ///
1486 /// # Examples
1487 ///
1488 /// ```
1489 /// let v: Vec<&str> = "A.B.".rsplit_terminator('.').collect();
1490 /// assert_eq!(v, ["B", "A"]);
1491 ///
1492 /// let v: Vec<&str> = "A..B..".rsplit_terminator(".").collect();
1493 /// assert_eq!(v, ["", "B", "", "A"]);
1494 ///
1495 /// let v: Vec<&str> = "A.B:C.D".rsplit_terminator(&['.', ':'][..]).collect();
1496 /// assert_eq!(v, ["D", "C", "B", "A"]);
1497 /// ```
1498 #[stable(feature = "rust1", since = "1.0.0")]
1499 #[inline]
rsplit_terminator<'a, P>(&'a self, pat: P) -> RSplitTerminator<'a, P> where P: Pattern<'a, Searcher: ReverseSearcher<'a>>,1500 pub fn rsplit_terminator<'a, P>(&'a self, pat: P) -> RSplitTerminator<'a, P>
1501 where
1502 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1503 {
1504 RSplitTerminator(self.split_terminator(pat).0)
1505 }
1506
1507 /// An iterator over substrings of the given string slice, separated by a
1508 /// pattern, restricted to returning at most `n` items.
1509 ///
1510 /// If `n` substrings are returned, the last substring (the `n`th substring)
1511 /// will contain the remainder of the string.
1512 ///
1513 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1514 /// function or closure that determines if a character matches.
1515 ///
1516 /// [`char`]: prim@char
1517 /// [pattern]: self::pattern
1518 ///
1519 /// # Iterator behavior
1520 ///
1521 /// The returned iterator will not be double ended, because it is
1522 /// not efficient to support.
1523 ///
1524 /// If the pattern allows a reverse search, the [`rsplitn`] method can be
1525 /// used.
1526 ///
1527 /// [`rsplitn`]: str::rsplitn
1528 ///
1529 /// # Examples
1530 ///
1531 /// Simple patterns:
1532 ///
1533 /// ```
1534 /// let v: Vec<&str> = "Mary had a little lambda".splitn(3, ' ').collect();
1535 /// assert_eq!(v, ["Mary", "had", "a little lambda"]);
1536 ///
1537 /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(3, "X").collect();
1538 /// assert_eq!(v, ["lion", "", "tigerXleopard"]);
1539 ///
1540 /// let v: Vec<&str> = "abcXdef".splitn(1, 'X').collect();
1541 /// assert_eq!(v, ["abcXdef"]);
1542 ///
1543 /// let v: Vec<&str> = "".splitn(1, 'X').collect();
1544 /// assert_eq!(v, [""]);
1545 /// ```
1546 ///
1547 /// A more complex pattern, using a closure:
1548 ///
1549 /// ```
1550 /// let v: Vec<&str> = "abc1defXghi".splitn(2, |c| c == '1' || c == 'X').collect();
1551 /// assert_eq!(v, ["abc", "defXghi"]);
1552 /// ```
1553 #[stable(feature = "rust1", since = "1.0.0")]
1554 #[inline]
splitn<'a, P: Pattern<'a>>(&'a self, n: usize, pat: P) -> SplitN<'a, P>1555 pub fn splitn<'a, P: Pattern<'a>>(&'a self, n: usize, pat: P) -> SplitN<'a, P> {
1556 SplitN(SplitNInternal { iter: self.split(pat).0, count: n })
1557 }
1558
1559 /// An iterator over substrings of this string slice, separated by a
1560 /// pattern, starting from the end of the string, restricted to returning
1561 /// at most `n` items.
1562 ///
1563 /// If `n` substrings are returned, the last substring (the `n`th substring)
1564 /// will contain the remainder of the string.
1565 ///
1566 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1567 /// function or closure that determines if a character matches.
1568 ///
1569 /// [`char`]: prim@char
1570 /// [pattern]: self::pattern
1571 ///
1572 /// # Iterator behavior
1573 ///
1574 /// The returned iterator will not be double ended, because it is not
1575 /// efficient to support.
1576 ///
1577 /// For splitting from the front, the [`splitn`] method can be used.
1578 ///
1579 /// [`splitn`]: str::splitn
1580 ///
1581 /// # Examples
1582 ///
1583 /// Simple patterns:
1584 ///
1585 /// ```
1586 /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(3, ' ').collect();
1587 /// assert_eq!(v, ["lamb", "little", "Mary had a"]);
1588 ///
1589 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(3, 'X').collect();
1590 /// assert_eq!(v, ["leopard", "tiger", "lionX"]);
1591 ///
1592 /// let v: Vec<&str> = "lion::tiger::leopard".rsplitn(2, "::").collect();
1593 /// assert_eq!(v, ["leopard", "lion::tiger"]);
1594 /// ```
1595 ///
1596 /// A more complex pattern, using a closure:
1597 ///
1598 /// ```
1599 /// let v: Vec<&str> = "abc1defXghi".rsplitn(2, |c| c == '1' || c == 'X').collect();
1600 /// assert_eq!(v, ["ghi", "abc1def"]);
1601 /// ```
1602 #[stable(feature = "rust1", since = "1.0.0")]
1603 #[inline]
rsplitn<'a, P>(&'a self, n: usize, pat: P) -> RSplitN<'a, P> where P: Pattern<'a, Searcher: ReverseSearcher<'a>>,1604 pub fn rsplitn<'a, P>(&'a self, n: usize, pat: P) -> RSplitN<'a, P>
1605 where
1606 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1607 {
1608 RSplitN(self.splitn(n, pat).0)
1609 }
1610
1611 /// Splits the string on the first occurrence of the specified delimiter and
1612 /// returns prefix before delimiter and suffix after delimiter.
1613 ///
1614 /// # Examples
1615 ///
1616 /// ```
1617 /// assert_eq!("cfg".split_once('='), None);
1618 /// assert_eq!("cfg=".split_once('='), Some(("cfg", "")));
1619 /// assert_eq!("cfg=foo".split_once('='), Some(("cfg", "foo")));
1620 /// assert_eq!("cfg=foo=bar".split_once('='), Some(("cfg", "foo=bar")));
1621 /// ```
1622 #[stable(feature = "str_split_once", since = "1.52.0")]
1623 #[inline]
split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a str)>1624 pub fn split_once<'a, P: Pattern<'a>>(&'a self, delimiter: P) -> Option<(&'a str, &'a str)> {
1625 let (start, end) = delimiter.into_searcher(self).next_match()?;
1626 // SAFETY: `Searcher` is known to return valid indices.
1627 unsafe { Some((self.get_unchecked(..start), self.get_unchecked(end..))) }
1628 }
1629
1630 /// Splits the string on the last occurrence of the specified delimiter and
1631 /// returns prefix before delimiter and suffix after delimiter.
1632 ///
1633 /// # Examples
1634 ///
1635 /// ```
1636 /// assert_eq!("cfg".rsplit_once('='), None);
1637 /// assert_eq!("cfg=foo".rsplit_once('='), Some(("cfg", "foo")));
1638 /// assert_eq!("cfg=foo=bar".rsplit_once('='), Some(("cfg=foo", "bar")));
1639 /// ```
1640 #[stable(feature = "str_split_once", since = "1.52.0")]
1641 #[inline]
rsplit_once<'a, P>(&'a self, delimiter: P) -> Option<(&'a str, &'a str)> where P: Pattern<'a, Searcher: ReverseSearcher<'a>>,1642 pub fn rsplit_once<'a, P>(&'a self, delimiter: P) -> Option<(&'a str, &'a str)>
1643 where
1644 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1645 {
1646 let (start, end) = delimiter.into_searcher(self).next_match_back()?;
1647 // SAFETY: `Searcher` is known to return valid indices.
1648 unsafe { Some((self.get_unchecked(..start), self.get_unchecked(end..))) }
1649 }
1650
1651 /// An iterator over the disjoint matches of a pattern within the given string
1652 /// slice.
1653 ///
1654 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1655 /// function or closure that determines if a character matches.
1656 ///
1657 /// [`char`]: prim@char
1658 /// [pattern]: self::pattern
1659 ///
1660 /// # Iterator behavior
1661 ///
1662 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
1663 /// allows a reverse search and forward/reverse search yields the same
1664 /// elements. This is true for, e.g., [`char`], but not for `&str`.
1665 ///
1666 /// If the pattern allows a reverse search but its results might differ
1667 /// from a forward search, the [`rmatches`] method can be used.
1668 ///
1669 /// [`rmatches`]: str::matches
1670 ///
1671 /// # Examples
1672 ///
1673 /// ```
1674 /// let v: Vec<&str> = "abcXXXabcYYYabc".matches("abc").collect();
1675 /// assert_eq!(v, ["abc", "abc", "abc"]);
1676 ///
1677 /// let v: Vec<&str> = "1abc2abc3".matches(char::is_numeric).collect();
1678 /// assert_eq!(v, ["1", "2", "3"]);
1679 /// ```
1680 #[stable(feature = "str_matches", since = "1.2.0")]
1681 #[inline]
matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P>1682 pub fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> {
1683 Matches(MatchesInternal(pat.into_searcher(self)))
1684 }
1685
1686 /// An iterator over the disjoint matches of a pattern within this string slice,
1687 /// yielded in reverse order.
1688 ///
1689 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1690 /// function or closure that determines if a character matches.
1691 ///
1692 /// [`char`]: prim@char
1693 /// [pattern]: self::pattern
1694 ///
1695 /// # Iterator behavior
1696 ///
1697 /// The returned iterator requires that the pattern supports a reverse
1698 /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse
1699 /// search yields the same elements.
1700 ///
1701 /// For iterating from the front, the [`matches`] method can be used.
1702 ///
1703 /// [`matches`]: str::matches
1704 ///
1705 /// # Examples
1706 ///
1707 /// ```
1708 /// let v: Vec<&str> = "abcXXXabcYYYabc".rmatches("abc").collect();
1709 /// assert_eq!(v, ["abc", "abc", "abc"]);
1710 ///
1711 /// let v: Vec<&str> = "1abc2abc3".rmatches(char::is_numeric).collect();
1712 /// assert_eq!(v, ["3", "2", "1"]);
1713 /// ```
1714 #[stable(feature = "str_matches", since = "1.2.0")]
1715 #[inline]
rmatches<'a, P>(&'a self, pat: P) -> RMatches<'a, P> where P: Pattern<'a, Searcher: ReverseSearcher<'a>>,1716 pub fn rmatches<'a, P>(&'a self, pat: P) -> RMatches<'a, P>
1717 where
1718 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1719 {
1720 RMatches(self.matches(pat).0)
1721 }
1722
1723 /// An iterator over the disjoint matches of a pattern within this string
1724 /// slice as well as the index that the match starts at.
1725 ///
1726 /// For matches of `pat` within `self` that overlap, only the indices
1727 /// corresponding to the first match are returned.
1728 ///
1729 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1730 /// function or closure that determines if a character matches.
1731 ///
1732 /// [`char`]: prim@char
1733 /// [pattern]: self::pattern
1734 ///
1735 /// # Iterator behavior
1736 ///
1737 /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern
1738 /// allows a reverse search and forward/reverse search yields the same
1739 /// elements. This is true for, e.g., [`char`], but not for `&str`.
1740 ///
1741 /// If the pattern allows a reverse search but its results might differ
1742 /// from a forward search, the [`rmatch_indices`] method can be used.
1743 ///
1744 /// [`rmatch_indices`]: str::rmatch_indices
1745 ///
1746 /// # Examples
1747 ///
1748 /// ```
1749 /// let v: Vec<_> = "abcXXXabcYYYabc".match_indices("abc").collect();
1750 /// assert_eq!(v, [(0, "abc"), (6, "abc"), (12, "abc")]);
1751 ///
1752 /// let v: Vec<_> = "1abcabc2".match_indices("abc").collect();
1753 /// assert_eq!(v, [(1, "abc"), (4, "abc")]);
1754 ///
1755 /// let v: Vec<_> = "ababa".match_indices("aba").collect();
1756 /// assert_eq!(v, [(0, "aba")]); // only the first `aba`
1757 /// ```
1758 #[stable(feature = "str_match_indices", since = "1.5.0")]
1759 #[inline]
match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P>1760 pub fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
1761 MatchIndices(MatchIndicesInternal(pat.into_searcher(self)))
1762 }
1763
1764 /// An iterator over the disjoint matches of a pattern within `self`,
1765 /// yielded in reverse order along with the index of the match.
1766 ///
1767 /// For matches of `pat` within `self` that overlap, only the indices
1768 /// corresponding to the last match are returned.
1769 ///
1770 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
1771 /// function or closure that determines if a character matches.
1772 ///
1773 /// [`char`]: prim@char
1774 /// [pattern]: self::pattern
1775 ///
1776 /// # Iterator behavior
1777 ///
1778 /// The returned iterator requires that the pattern supports a reverse
1779 /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse
1780 /// search yields the same elements.
1781 ///
1782 /// For iterating from the front, the [`match_indices`] method can be used.
1783 ///
1784 /// [`match_indices`]: str::match_indices
1785 ///
1786 /// # Examples
1787 ///
1788 /// ```
1789 /// let v: Vec<_> = "abcXXXabcYYYabc".rmatch_indices("abc").collect();
1790 /// assert_eq!(v, [(12, "abc"), (6, "abc"), (0, "abc")]);
1791 ///
1792 /// let v: Vec<_> = "1abcabc2".rmatch_indices("abc").collect();
1793 /// assert_eq!(v, [(4, "abc"), (1, "abc")]);
1794 ///
1795 /// let v: Vec<_> = "ababa".rmatch_indices("aba").collect();
1796 /// assert_eq!(v, [(2, "aba")]); // only the last `aba`
1797 /// ```
1798 #[stable(feature = "str_match_indices", since = "1.5.0")]
1799 #[inline]
rmatch_indices<'a, P>(&'a self, pat: P) -> RMatchIndices<'a, P> where P: Pattern<'a, Searcher: ReverseSearcher<'a>>,1800 pub fn rmatch_indices<'a, P>(&'a self, pat: P) -> RMatchIndices<'a, P>
1801 where
1802 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
1803 {
1804 RMatchIndices(self.match_indices(pat).0)
1805 }
1806
1807 /// Returns a string slice with leading and trailing whitespace removed.
1808 ///
1809 /// 'Whitespace' is defined according to the terms of the Unicode Derived
1810 /// Core Property `White_Space`, which includes newlines.
1811 ///
1812 /// # Examples
1813 ///
1814 /// ```
1815 /// let s = "\n Hello\tworld\t\n";
1816 ///
1817 /// assert_eq!("Hello\tworld", s.trim());
1818 /// ```
1819 #[inline]
1820 #[must_use = "this returns the trimmed string as a slice, \
1821 without modifying the original"]
1822 #[stable(feature = "rust1", since = "1.0.0")]
1823 #[cfg_attr(not(test), rustc_diagnostic_item = "str_trim")]
trim(&self) -> &str1824 pub fn trim(&self) -> &str {
1825 self.trim_matches(|c: char| c.is_whitespace())
1826 }
1827
1828 /// Returns a string slice with leading whitespace removed.
1829 ///
1830 /// 'Whitespace' is defined according to the terms of the Unicode Derived
1831 /// Core Property `White_Space`, which includes newlines.
1832 ///
1833 /// # Text directionality
1834 ///
1835 /// A string is a sequence of bytes. `start` in this context means the first
1836 /// position of that byte string; for a left-to-right language like English or
1837 /// Russian, this will be left side, and for right-to-left languages like
1838 /// Arabic or Hebrew, this will be the right side.
1839 ///
1840 /// # Examples
1841 ///
1842 /// Basic usage:
1843 ///
1844 /// ```
1845 /// let s = "\n Hello\tworld\t\n";
1846 /// assert_eq!("Hello\tworld\t\n", s.trim_start());
1847 /// ```
1848 ///
1849 /// Directionality:
1850 ///
1851 /// ```
1852 /// let s = " English ";
1853 /// assert!(Some('E') == s.trim_start().chars().next());
1854 ///
1855 /// let s = " עברית ";
1856 /// assert!(Some('ע') == s.trim_start().chars().next());
1857 /// ```
1858 #[inline]
1859 #[must_use = "this returns the trimmed string as a new slice, \
1860 without modifying the original"]
1861 #[stable(feature = "trim_direction", since = "1.30.0")]
1862 #[cfg_attr(not(test), rustc_diagnostic_item = "str_trim_start")]
trim_start(&self) -> &str1863 pub fn trim_start(&self) -> &str {
1864 self.trim_start_matches(|c: char| c.is_whitespace())
1865 }
1866
1867 /// Returns a string slice with trailing whitespace removed.
1868 ///
1869 /// 'Whitespace' is defined according to the terms of the Unicode Derived
1870 /// Core Property `White_Space`, which includes newlines.
1871 ///
1872 /// # Text directionality
1873 ///
1874 /// A string is a sequence of bytes. `end` in this context means the last
1875 /// position of that byte string; for a left-to-right language like English or
1876 /// Russian, this will be right side, and for right-to-left languages like
1877 /// Arabic or Hebrew, this will be the left side.
1878 ///
1879 /// # Examples
1880 ///
1881 /// Basic usage:
1882 ///
1883 /// ```
1884 /// let s = "\n Hello\tworld\t\n";
1885 /// assert_eq!("\n Hello\tworld", s.trim_end());
1886 /// ```
1887 ///
1888 /// Directionality:
1889 ///
1890 /// ```
1891 /// let s = " English ";
1892 /// assert!(Some('h') == s.trim_end().chars().rev().next());
1893 ///
1894 /// let s = " עברית ";
1895 /// assert!(Some('ת') == s.trim_end().chars().rev().next());
1896 /// ```
1897 #[inline]
1898 #[must_use = "this returns the trimmed string as a new slice, \
1899 without modifying the original"]
1900 #[stable(feature = "trim_direction", since = "1.30.0")]
1901 #[cfg_attr(not(test), rustc_diagnostic_item = "str_trim_end")]
trim_end(&self) -> &str1902 pub fn trim_end(&self) -> &str {
1903 self.trim_end_matches(|c: char| c.is_whitespace())
1904 }
1905
1906 /// Returns a string slice with leading whitespace removed.
1907 ///
1908 /// 'Whitespace' is defined according to the terms of the Unicode Derived
1909 /// Core Property `White_Space`.
1910 ///
1911 /// # Text directionality
1912 ///
1913 /// A string is a sequence of bytes. 'Left' in this context means the first
1914 /// position of that byte string; for a language like Arabic or Hebrew
1915 /// which are 'right to left' rather than 'left to right', this will be
1916 /// the _right_ side, not the left.
1917 ///
1918 /// # Examples
1919 ///
1920 /// Basic usage:
1921 ///
1922 /// ```
1923 /// let s = " Hello\tworld\t";
1924 ///
1925 /// assert_eq!("Hello\tworld\t", s.trim_left());
1926 /// ```
1927 ///
1928 /// Directionality:
1929 ///
1930 /// ```
1931 /// let s = " English";
1932 /// assert!(Some('E') == s.trim_left().chars().next());
1933 ///
1934 /// let s = " עברית";
1935 /// assert!(Some('ע') == s.trim_left().chars().next());
1936 /// ```
1937 #[must_use = "this returns the trimmed string as a new slice, \
1938 without modifying the original"]
1939 #[inline]
1940 #[stable(feature = "rust1", since = "1.0.0")]
1941 #[deprecated(since = "1.33.0", note = "superseded by `trim_start`", suggestion = "trim_start")]
trim_left(&self) -> &str1942 pub fn trim_left(&self) -> &str {
1943 self.trim_start()
1944 }
1945
1946 /// Returns a string slice with trailing whitespace removed.
1947 ///
1948 /// 'Whitespace' is defined according to the terms of the Unicode Derived
1949 /// Core Property `White_Space`.
1950 ///
1951 /// # Text directionality
1952 ///
1953 /// A string is a sequence of bytes. 'Right' in this context means the last
1954 /// position of that byte string; for a language like Arabic or Hebrew
1955 /// which are 'right to left' rather than 'left to right', this will be
1956 /// the _left_ side, not the right.
1957 ///
1958 /// # Examples
1959 ///
1960 /// Basic usage:
1961 ///
1962 /// ```
1963 /// let s = " Hello\tworld\t";
1964 ///
1965 /// assert_eq!(" Hello\tworld", s.trim_right());
1966 /// ```
1967 ///
1968 /// Directionality:
1969 ///
1970 /// ```
1971 /// let s = "English ";
1972 /// assert!(Some('h') == s.trim_right().chars().rev().next());
1973 ///
1974 /// let s = "עברית ";
1975 /// assert!(Some('ת') == s.trim_right().chars().rev().next());
1976 /// ```
1977 #[must_use = "this returns the trimmed string as a new slice, \
1978 without modifying the original"]
1979 #[inline]
1980 #[stable(feature = "rust1", since = "1.0.0")]
1981 #[deprecated(since = "1.33.0", note = "superseded by `trim_end`", suggestion = "trim_end")]
trim_right(&self) -> &str1982 pub fn trim_right(&self) -> &str {
1983 self.trim_end()
1984 }
1985
1986 /// Returns a string slice with all prefixes and suffixes that match a
1987 /// pattern repeatedly removed.
1988 ///
1989 /// The [pattern] can be a [`char`], a slice of [`char`]s, or a function
1990 /// or closure that determines if a character matches.
1991 ///
1992 /// [`char`]: prim@char
1993 /// [pattern]: self::pattern
1994 ///
1995 /// # Examples
1996 ///
1997 /// Simple patterns:
1998 ///
1999 /// ```
2000 /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
2001 /// assert_eq!("123foo1bar123".trim_matches(char::is_numeric), "foo1bar");
2002 ///
2003 /// let x: &[_] = &['1', '2'];
2004 /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
2005 /// ```
2006 ///
2007 /// A more complex pattern, using a closure:
2008 ///
2009 /// ```
2010 /// assert_eq!("1foo1barXX".trim_matches(|c| c == '1' || c == 'X'), "foo1bar");
2011 /// ```
2012 #[must_use = "this returns the trimmed string as a new slice, \
2013 without modifying the original"]
2014 #[stable(feature = "rust1", since = "1.0.0")]
trim_matches<'a, P>(&'a self, pat: P) -> &'a str where P: Pattern<'a, Searcher: DoubleEndedSearcher<'a>>,2015 pub fn trim_matches<'a, P>(&'a self, pat: P) -> &'a str
2016 where
2017 P: Pattern<'a, Searcher: DoubleEndedSearcher<'a>>,
2018 {
2019 let mut i = 0;
2020 let mut j = 0;
2021 let mut matcher = pat.into_searcher(self);
2022 if let Some((a, b)) = matcher.next_reject() {
2023 i = a;
2024 j = b; // Remember earliest known match, correct it below if
2025 // last match is different
2026 }
2027 if let Some((_, b)) = matcher.next_reject_back() {
2028 j = b;
2029 }
2030 // SAFETY: `Searcher` is known to return valid indices.
2031 unsafe { self.get_unchecked(i..j) }
2032 }
2033
2034 /// Returns a string slice with all prefixes that match a pattern
2035 /// repeatedly removed.
2036 ///
2037 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
2038 /// function or closure that determines if a character matches.
2039 ///
2040 /// [`char`]: prim@char
2041 /// [pattern]: self::pattern
2042 ///
2043 /// # Text directionality
2044 ///
2045 /// A string is a sequence of bytes. `start` in this context means the first
2046 /// position of that byte string; for a left-to-right language like English or
2047 /// Russian, this will be left side, and for right-to-left languages like
2048 /// Arabic or Hebrew, this will be the right side.
2049 ///
2050 /// # Examples
2051 ///
2052 /// ```
2053 /// assert_eq!("11foo1bar11".trim_start_matches('1'), "foo1bar11");
2054 /// assert_eq!("123foo1bar123".trim_start_matches(char::is_numeric), "foo1bar123");
2055 ///
2056 /// let x: &[_] = &['1', '2'];
2057 /// assert_eq!("12foo1bar12".trim_start_matches(x), "foo1bar12");
2058 /// ```
2059 #[must_use = "this returns the trimmed string as a new slice, \
2060 without modifying the original"]
2061 #[stable(feature = "trim_direction", since = "1.30.0")]
trim_start_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str2062 pub fn trim_start_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
2063 let mut i = self.len();
2064 let mut matcher = pat.into_searcher(self);
2065 if let Some((a, _)) = matcher.next_reject() {
2066 i = a;
2067 }
2068 // SAFETY: `Searcher` is known to return valid indices.
2069 unsafe { self.get_unchecked(i..self.len()) }
2070 }
2071
2072 /// Returns a string slice with the prefix removed.
2073 ///
2074 /// If the string starts with the pattern `prefix`, returns substring after the prefix, wrapped
2075 /// in `Some`. Unlike `trim_start_matches`, this method removes the prefix exactly once.
2076 ///
2077 /// If the string does not start with `prefix`, returns `None`.
2078 ///
2079 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
2080 /// function or closure that determines if a character matches.
2081 ///
2082 /// [`char`]: prim@char
2083 /// [pattern]: self::pattern
2084 ///
2085 /// # Examples
2086 ///
2087 /// ```
2088 /// assert_eq!("foo:bar".strip_prefix("foo:"), Some("bar"));
2089 /// assert_eq!("foo:bar".strip_prefix("bar"), None);
2090 /// assert_eq!("foofoo".strip_prefix("foo"), Some("foo"));
2091 /// ```
2092 #[must_use = "this returns the remaining substring as a new slice, \
2093 without modifying the original"]
2094 #[stable(feature = "str_strip", since = "1.45.0")]
strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a str>2095 pub fn strip_prefix<'a, P: Pattern<'a>>(&'a self, prefix: P) -> Option<&'a str> {
2096 prefix.strip_prefix_of(self)
2097 }
2098
2099 /// Returns a string slice with the suffix removed.
2100 ///
2101 /// If the string ends with the pattern `suffix`, returns the substring before the suffix,
2102 /// wrapped in `Some`. Unlike `trim_end_matches`, this method removes the suffix exactly once.
2103 ///
2104 /// If the string does not end with `suffix`, returns `None`.
2105 ///
2106 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
2107 /// function or closure that determines if a character matches.
2108 ///
2109 /// [`char`]: prim@char
2110 /// [pattern]: self::pattern
2111 ///
2112 /// # Examples
2113 ///
2114 /// ```
2115 /// assert_eq!("bar:foo".strip_suffix(":foo"), Some("bar"));
2116 /// assert_eq!("bar:foo".strip_suffix("bar"), None);
2117 /// assert_eq!("foofoo".strip_suffix("foo"), Some("foo"));
2118 /// ```
2119 #[must_use = "this returns the remaining substring as a new slice, \
2120 without modifying the original"]
2121 #[stable(feature = "str_strip", since = "1.45.0")]
strip_suffix<'a, P>(&'a self, suffix: P) -> Option<&'a str> where P: Pattern<'a>, <P as Pattern<'a>>::Searcher: ReverseSearcher<'a>,2122 pub fn strip_suffix<'a, P>(&'a self, suffix: P) -> Option<&'a str>
2123 where
2124 P: Pattern<'a>,
2125 <P as Pattern<'a>>::Searcher: ReverseSearcher<'a>,
2126 {
2127 suffix.strip_suffix_of(self)
2128 }
2129
2130 /// Returns a string slice with all suffixes that match a pattern
2131 /// repeatedly removed.
2132 ///
2133 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
2134 /// function or closure that determines if a character matches.
2135 ///
2136 /// [`char`]: prim@char
2137 /// [pattern]: self::pattern
2138 ///
2139 /// # Text directionality
2140 ///
2141 /// A string is a sequence of bytes. `end` in this context means the last
2142 /// position of that byte string; for a left-to-right language like English or
2143 /// Russian, this will be right side, and for right-to-left languages like
2144 /// Arabic or Hebrew, this will be the left side.
2145 ///
2146 /// # Examples
2147 ///
2148 /// Simple patterns:
2149 ///
2150 /// ```
2151 /// assert_eq!("11foo1bar11".trim_end_matches('1'), "11foo1bar");
2152 /// assert_eq!("123foo1bar123".trim_end_matches(char::is_numeric), "123foo1bar");
2153 ///
2154 /// let x: &[_] = &['1', '2'];
2155 /// assert_eq!("12foo1bar12".trim_end_matches(x), "12foo1bar");
2156 /// ```
2157 ///
2158 /// A more complex pattern, using a closure:
2159 ///
2160 /// ```
2161 /// assert_eq!("1fooX".trim_end_matches(|c| c == '1' || c == 'X'), "1foo");
2162 /// ```
2163 #[must_use = "this returns the trimmed string as a new slice, \
2164 without modifying the original"]
2165 #[stable(feature = "trim_direction", since = "1.30.0")]
trim_end_matches<'a, P>(&'a self, pat: P) -> &'a str where P: Pattern<'a, Searcher: ReverseSearcher<'a>>,2166 pub fn trim_end_matches<'a, P>(&'a self, pat: P) -> &'a str
2167 where
2168 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
2169 {
2170 let mut j = 0;
2171 let mut matcher = pat.into_searcher(self);
2172 if let Some((_, b)) = matcher.next_reject_back() {
2173 j = b;
2174 }
2175 // SAFETY: `Searcher` is known to return valid indices.
2176 unsafe { self.get_unchecked(0..j) }
2177 }
2178
2179 /// Returns a string slice with all prefixes that match a pattern
2180 /// repeatedly removed.
2181 ///
2182 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
2183 /// function or closure that determines if a character matches.
2184 ///
2185 /// [`char`]: prim@char
2186 /// [pattern]: self::pattern
2187 ///
2188 /// # Text directionality
2189 ///
2190 /// A string is a sequence of bytes. 'Left' in this context means the first
2191 /// position of that byte string; for a language like Arabic or Hebrew
2192 /// which are 'right to left' rather than 'left to right', this will be
2193 /// the _right_ side, not the left.
2194 ///
2195 /// # Examples
2196 ///
2197 /// ```
2198 /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
2199 /// assert_eq!("123foo1bar123".trim_left_matches(char::is_numeric), "foo1bar123");
2200 ///
2201 /// let x: &[_] = &['1', '2'];
2202 /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
2203 /// ```
2204 #[stable(feature = "rust1", since = "1.0.0")]
2205 #[deprecated(
2206 since = "1.33.0",
2207 note = "superseded by `trim_start_matches`",
2208 suggestion = "trim_start_matches"
2209 )]
trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str2210 pub fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
2211 self.trim_start_matches(pat)
2212 }
2213
2214 /// Returns a string slice with all suffixes that match a pattern
2215 /// repeatedly removed.
2216 ///
2217 /// The [pattern] can be a `&str`, [`char`], a slice of [`char`]s, or a
2218 /// function or closure that determines if a character matches.
2219 ///
2220 /// [`char`]: prim@char
2221 /// [pattern]: self::pattern
2222 ///
2223 /// # Text directionality
2224 ///
2225 /// A string is a sequence of bytes. 'Right' in this context means the last
2226 /// position of that byte string; for a language like Arabic or Hebrew
2227 /// which are 'right to left' rather than 'left to right', this will be
2228 /// the _left_ side, not the right.
2229 ///
2230 /// # Examples
2231 ///
2232 /// Simple patterns:
2233 ///
2234 /// ```
2235 /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
2236 /// assert_eq!("123foo1bar123".trim_right_matches(char::is_numeric), "123foo1bar");
2237 ///
2238 /// let x: &[_] = &['1', '2'];
2239 /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
2240 /// ```
2241 ///
2242 /// A more complex pattern, using a closure:
2243 ///
2244 /// ```
2245 /// assert_eq!("1fooX".trim_right_matches(|c| c == '1' || c == 'X'), "1foo");
2246 /// ```
2247 #[stable(feature = "rust1", since = "1.0.0")]
2248 #[deprecated(
2249 since = "1.33.0",
2250 note = "superseded by `trim_end_matches`",
2251 suggestion = "trim_end_matches"
2252 )]
trim_right_matches<'a, P>(&'a self, pat: P) -> &'a str where P: Pattern<'a, Searcher: ReverseSearcher<'a>>,2253 pub fn trim_right_matches<'a, P>(&'a self, pat: P) -> &'a str
2254 where
2255 P: Pattern<'a, Searcher: ReverseSearcher<'a>>,
2256 {
2257 self.trim_end_matches(pat)
2258 }
2259
2260 /// Parses this string slice into another type.
2261 ///
2262 /// Because `parse` is so general, it can cause problems with type
2263 /// inference. As such, `parse` is one of the few times you'll see
2264 /// the syntax affectionately known as the 'turbofish': `::<>`. This
2265 /// helps the inference algorithm understand specifically which type
2266 /// you're trying to parse into.
2267 ///
2268 /// `parse` can parse into any type that implements the [`FromStr`] trait.
2269
2270 ///
2271 /// # Errors
2272 ///
2273 /// Will return [`Err`] if it's not possible to parse this string slice into
2274 /// the desired type.
2275 ///
2276 /// [`Err`]: FromStr::Err
2277 ///
2278 /// # Examples
2279 ///
2280 /// Basic usage
2281 ///
2282 /// ```
2283 /// let four: u32 = "4".parse().unwrap();
2284 ///
2285 /// assert_eq!(4, four);
2286 /// ```
2287 ///
2288 /// Using the 'turbofish' instead of annotating `four`:
2289 ///
2290 /// ```
2291 /// let four = "4".parse::<u32>();
2292 ///
2293 /// assert_eq!(Ok(4), four);
2294 /// ```
2295 ///
2296 /// Failing to parse:
2297 ///
2298 /// ```
2299 /// let nope = "j".parse::<u32>();
2300 ///
2301 /// assert!(nope.is_err());
2302 /// ```
2303 #[inline]
2304 #[stable(feature = "rust1", since = "1.0.0")]
parse<F: FromStr>(&self) -> Result<F, F::Err>2305 pub fn parse<F: FromStr>(&self) -> Result<F, F::Err> {
2306 FromStr::from_str(self)
2307 }
2308
2309 /// Checks if all characters in this string are within the ASCII range.
2310 ///
2311 /// # Examples
2312 ///
2313 /// ```
2314 /// let ascii = "hello!\n";
2315 /// let non_ascii = "Grüße, Jürgen ❤";
2316 ///
2317 /// assert!(ascii.is_ascii());
2318 /// assert!(!non_ascii.is_ascii());
2319 /// ```
2320 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
2321 #[rustc_const_unstable(feature = "const_slice_is_ascii", issue = "111090")]
2322 #[must_use]
2323 #[inline]
is_ascii(&self) -> bool2324 pub const fn is_ascii(&self) -> bool {
2325 // We can treat each byte as character here: all multibyte characters
2326 // start with a byte that is not in the ASCII range, so we will stop
2327 // there already.
2328 self.as_bytes().is_ascii()
2329 }
2330
2331 /// If this string slice [`is_ascii`](Self::is_ascii), returns it as a slice
2332 /// of [ASCII characters](`ascii::Char`), otherwise returns `None`.
2333 #[unstable(feature = "ascii_char", issue = "110998")]
2334 #[must_use]
2335 #[inline]
as_ascii(&self) -> Option<&[ascii::Char]>2336 pub const fn as_ascii(&self) -> Option<&[ascii::Char]> {
2337 // Like in `is_ascii`, we can work on the bytes directly.
2338 self.as_bytes().as_ascii()
2339 }
2340
2341 /// Checks that two strings are an ASCII case-insensitive match.
2342 ///
2343 /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
2344 /// but without allocating and copying temporaries.
2345 ///
2346 /// # Examples
2347 ///
2348 /// ```
2349 /// assert!("Ferris".eq_ignore_ascii_case("FERRIS"));
2350 /// assert!("Ferrös".eq_ignore_ascii_case("FERRöS"));
2351 /// assert!(!"Ferrös".eq_ignore_ascii_case("FERRÖS"));
2352 /// ```
2353 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
2354 #[must_use]
2355 #[inline]
eq_ignore_ascii_case(&self, other: &str) -> bool2356 pub fn eq_ignore_ascii_case(&self, other: &str) -> bool {
2357 self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
2358 }
2359
2360 /// Converts this string to its ASCII upper case equivalent in-place.
2361 ///
2362 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
2363 /// but non-ASCII letters are unchanged.
2364 ///
2365 /// To return a new uppercased value without modifying the existing one, use
2366 /// [`to_ascii_uppercase()`].
2367 ///
2368 /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase
2369 ///
2370 /// # Examples
2371 ///
2372 /// ```
2373 /// let mut s = String::from("Grüße, Jürgen ❤");
2374 ///
2375 /// s.make_ascii_uppercase();
2376 ///
2377 /// assert_eq!("GRüßE, JüRGEN ❤", s);
2378 /// ```
2379 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
2380 #[inline]
make_ascii_uppercase(&mut self)2381 pub fn make_ascii_uppercase(&mut self) {
2382 // SAFETY: changing ASCII letters only does not invalidate UTF-8.
2383 let me = unsafe { self.as_bytes_mut() };
2384 me.make_ascii_uppercase()
2385 }
2386
2387 /// Converts this string to its ASCII lower case equivalent in-place.
2388 ///
2389 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
2390 /// but non-ASCII letters are unchanged.
2391 ///
2392 /// To return a new lowercased value without modifying the existing one, use
2393 /// [`to_ascii_lowercase()`].
2394 ///
2395 /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase
2396 ///
2397 /// # Examples
2398 ///
2399 /// ```
2400 /// let mut s = String::from("GRÜßE, JÜRGEN ❤");
2401 ///
2402 /// s.make_ascii_lowercase();
2403 ///
2404 /// assert_eq!("grÜße, jÜrgen ❤", s);
2405 /// ```
2406 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
2407 #[inline]
make_ascii_lowercase(&mut self)2408 pub fn make_ascii_lowercase(&mut self) {
2409 // SAFETY: changing ASCII letters only does not invalidate UTF-8.
2410 let me = unsafe { self.as_bytes_mut() };
2411 me.make_ascii_lowercase()
2412 }
2413
2414 /// Return an iterator that escapes each char in `self` with [`char::escape_debug`].
2415 ///
2416 /// Note: only extended grapheme codepoints that begin the string will be
2417 /// escaped.
2418 ///
2419 /// # Examples
2420 ///
2421 /// As an iterator:
2422 ///
2423 /// ```
2424 /// for c in "❤\n!".escape_debug() {
2425 /// print!("{c}");
2426 /// }
2427 /// println!();
2428 /// ```
2429 ///
2430 /// Using `println!` directly:
2431 ///
2432 /// ```
2433 /// println!("{}", "❤\n!".escape_debug());
2434 /// ```
2435 ///
2436 ///
2437 /// Both are equivalent to:
2438 ///
2439 /// ```
2440 /// println!("❤\\n!");
2441 /// ```
2442 ///
2443 /// Using `to_string`:
2444 ///
2445 /// ```
2446 /// assert_eq!("❤\n!".escape_debug().to_string(), "❤\\n!");
2447 /// ```
2448 #[must_use = "this returns the escaped string as an iterator, \
2449 without modifying the original"]
2450 #[stable(feature = "str_escape", since = "1.34.0")]
escape_debug(&self) -> EscapeDebug<'_>2451 pub fn escape_debug(&self) -> EscapeDebug<'_> {
2452 let mut chars = self.chars();
2453 EscapeDebug {
2454 inner: chars
2455 .next()
2456 .map(|first| first.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL))
2457 .into_iter()
2458 .flatten()
2459 .chain(chars.flat_map(CharEscapeDebugContinue)),
2460 }
2461 }
2462
2463 /// Return an iterator that escapes each char in `self` with [`char::escape_default`].
2464 ///
2465 /// # Examples
2466 ///
2467 /// As an iterator:
2468 ///
2469 /// ```
2470 /// for c in "❤\n!".escape_default() {
2471 /// print!("{c}");
2472 /// }
2473 /// println!();
2474 /// ```
2475 ///
2476 /// Using `println!` directly:
2477 ///
2478 /// ```
2479 /// println!("{}", "❤\n!".escape_default());
2480 /// ```
2481 ///
2482 ///
2483 /// Both are equivalent to:
2484 ///
2485 /// ```
2486 /// println!("\\u{{2764}}\\n!");
2487 /// ```
2488 ///
2489 /// Using `to_string`:
2490 ///
2491 /// ```
2492 /// assert_eq!("❤\n!".escape_default().to_string(), "\\u{2764}\\n!");
2493 /// ```
2494 #[must_use = "this returns the escaped string as an iterator, \
2495 without modifying the original"]
2496 #[stable(feature = "str_escape", since = "1.34.0")]
escape_default(&self) -> EscapeDefault<'_>2497 pub fn escape_default(&self) -> EscapeDefault<'_> {
2498 EscapeDefault { inner: self.chars().flat_map(CharEscapeDefault) }
2499 }
2500
2501 /// Return an iterator that escapes each char in `self` with [`char::escape_unicode`].
2502 ///
2503 /// # Examples
2504 ///
2505 /// As an iterator:
2506 ///
2507 /// ```
2508 /// for c in "❤\n!".escape_unicode() {
2509 /// print!("{c}");
2510 /// }
2511 /// println!();
2512 /// ```
2513 ///
2514 /// Using `println!` directly:
2515 ///
2516 /// ```
2517 /// println!("{}", "❤\n!".escape_unicode());
2518 /// ```
2519 ///
2520 ///
2521 /// Both are equivalent to:
2522 ///
2523 /// ```
2524 /// println!("\\u{{2764}}\\u{{a}}\\u{{21}}");
2525 /// ```
2526 ///
2527 /// Using `to_string`:
2528 ///
2529 /// ```
2530 /// assert_eq!("❤\n!".escape_unicode().to_string(), "\\u{2764}\\u{a}\\u{21}");
2531 /// ```
2532 #[must_use = "this returns the escaped string as an iterator, \
2533 without modifying the original"]
2534 #[stable(feature = "str_escape", since = "1.34.0")]
escape_unicode(&self) -> EscapeUnicode<'_>2535 pub fn escape_unicode(&self) -> EscapeUnicode<'_> {
2536 EscapeUnicode { inner: self.chars().flat_map(CharEscapeUnicode) }
2537 }
2538 }
2539
2540 #[stable(feature = "rust1", since = "1.0.0")]
2541 impl AsRef<[u8]> for str {
2542 #[inline]
as_ref(&self) -> &[u8]2543 fn as_ref(&self) -> &[u8] {
2544 self.as_bytes()
2545 }
2546 }
2547
2548 #[stable(feature = "rust1", since = "1.0.0")]
2549 impl Default for &str {
2550 /// Creates an empty str
2551 #[inline]
default() -> Self2552 fn default() -> Self {
2553 ""
2554 }
2555 }
2556
2557 #[stable(feature = "default_mut_str", since = "1.28.0")]
2558 impl Default for &mut str {
2559 /// Creates an empty mutable str
2560 #[inline]
default() -> Self2561 fn default() -> Self {
2562 // SAFETY: The empty string is valid UTF-8.
2563 unsafe { from_utf8_unchecked_mut(&mut []) }
2564 }
2565 }
2566
2567 impl_fn_for_zst! {
2568 /// A nameable, cloneable fn type
2569 #[derive(Clone)]
2570 struct LinesMap impl<'a> Fn = |line: &'a str| -> &'a str {
2571 let Some(line) = line.strip_suffix('\n') else { return line };
2572 let Some(line) = line.strip_suffix('\r') else { return line };
2573 line
2574 };
2575
2576 #[derive(Clone)]
2577 struct CharEscapeDebugContinue impl Fn = |c: char| -> char::EscapeDebug {
2578 c.escape_debug_ext(EscapeDebugExtArgs {
2579 escape_grapheme_extended: false,
2580 escape_single_quote: true,
2581 escape_double_quote: true
2582 })
2583 };
2584
2585 #[derive(Clone)]
2586 struct CharEscapeUnicode impl Fn = |c: char| -> char::EscapeUnicode {
2587 c.escape_unicode()
2588 };
2589 #[derive(Clone)]
2590 struct CharEscapeDefault impl Fn = |c: char| -> char::EscapeDefault {
2591 c.escape_default()
2592 };
2593
2594 #[derive(Clone)]
2595 struct IsWhitespace impl Fn = |c: char| -> bool {
2596 c.is_whitespace()
2597 };
2598
2599 #[derive(Clone)]
2600 struct IsAsciiWhitespace impl Fn = |byte: &u8| -> bool {
2601 byte.is_ascii_whitespace()
2602 };
2603
2604 #[derive(Clone)]
2605 struct IsNotEmpty impl<'a, 'b> Fn = |s: &'a &'b str| -> bool {
2606 !s.is_empty()
2607 };
2608
2609 #[derive(Clone)]
2610 struct BytesIsNotEmpty impl<'a, 'b> Fn = |s: &'a &'b [u8]| -> bool {
2611 !s.is_empty()
2612 };
2613
2614 #[derive(Clone)]
2615 struct UnsafeBytesToStr impl<'a> Fn = |bytes: &'a [u8]| -> &'a str {
2616 // SAFETY: not safe
2617 unsafe { from_utf8_unchecked(bytes) }
2618 };
2619 }
2620
2621 // This is required to make `impl From<&str> for Box<dyn Error>` and `impl<E> From<E> for Box<dyn Error>` not overlap.
2622 #[stable(feature = "rust1", since = "1.0.0")]
2623 impl !crate::error::Error for &str {}
2624