1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2023 Google LLC. All rights reserved. 3 // 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file or at 6 // https://developers.google.com/open-source/licenses/bsd 7 8 //! Items specific to `bytes` and `string` fields. 9 #![allow(dead_code)] 10 #![allow(unused)] 11 12 use crate::__internal::{Private, SealedInternal}; 13 use crate::__runtime::{InnerProtoString, PtrAndLen, RawMessage}; 14 use crate::{ 15 utf8::Utf8Chunks, AsView, IntoProxied, IntoView, Mut, MutProxied, MutProxy, Optional, Proxied, 16 Proxy, View, ViewProxy, 17 }; 18 use std::borrow::Cow; 19 use std::cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd}; 20 use std::convert::{AsMut, AsRef}; 21 use std::ffi::{OsStr, OsString}; 22 use std::fmt; 23 use std::hash::{Hash, Hasher}; 24 use std::iter; 25 use std::ops::{Deref, DerefMut}; 26 use std::ptr; 27 use std::rc::Rc; 28 use std::sync::Arc; 29 30 pub struct ProtoBytes { 31 pub(crate) inner: InnerProtoString, 32 } 33 34 impl ProtoBytes { 35 // Returns the kernel-specific container. This method is private in spirit and 36 // must not be called by a user. 37 #[doc(hidden)] into_inner(self, _private: Private) -> InnerProtoString38 pub fn into_inner(self, _private: Private) -> InnerProtoString { 39 self.inner 40 } 41 42 #[doc(hidden)] from_inner(_private: Private, inner: InnerProtoString) -> ProtoBytes43 pub fn from_inner(_private: Private, inner: InnerProtoString) -> ProtoBytes { 44 Self { inner } 45 } 46 as_view(&self) -> &[u8]47 pub fn as_view(&self) -> &[u8] { 48 self.inner.as_bytes() 49 } 50 } 51 52 impl AsRef<[u8]> for ProtoBytes { as_ref(&self) -> &[u8]53 fn as_ref(&self) -> &[u8] { 54 self.inner.as_bytes() 55 } 56 } 57 58 impl From<&[u8]> for ProtoBytes { from(v: &[u8]) -> ProtoBytes59 fn from(v: &[u8]) -> ProtoBytes { 60 ProtoBytes { inner: InnerProtoString::from(v) } 61 } 62 } 63 64 impl<const N: usize> From<&[u8; N]> for ProtoBytes { from(v: &[u8; N]) -> ProtoBytes65 fn from(v: &[u8; N]) -> ProtoBytes { 66 ProtoBytes { inner: InnerProtoString::from(v.as_ref()) } 67 } 68 } 69 70 impl SealedInternal for ProtoBytes {} 71 72 impl Proxied for ProtoBytes { 73 type View<'msg> = &'msg [u8]; 74 } 75 76 impl AsView for ProtoBytes { 77 type Proxied = Self; 78 as_view(&self) -> &[u8]79 fn as_view(&self) -> &[u8] { 80 self.as_view() 81 } 82 } 83 84 impl IntoProxied<ProtoBytes> for &[u8] { into_proxied(self, _private: Private) -> ProtoBytes85 fn into_proxied(self, _private: Private) -> ProtoBytes { 86 ProtoBytes::from(self) 87 } 88 } 89 90 impl<const N: usize> IntoProxied<ProtoBytes> for &[u8; N] { into_proxied(self, _private: Private) -> ProtoBytes91 fn into_proxied(self, _private: Private) -> ProtoBytes { 92 ProtoBytes::from(self.as_ref()) 93 } 94 } 95 96 impl IntoProxied<ProtoBytes> for Vec<u8> { into_proxied(self, _private: Private) -> ProtoBytes97 fn into_proxied(self, _private: Private) -> ProtoBytes { 98 ProtoBytes::from(AsRef::<[u8]>::as_ref(&self)) 99 } 100 } 101 102 impl IntoProxied<ProtoBytes> for &Vec<u8> { into_proxied(self, _private: Private) -> ProtoBytes103 fn into_proxied(self, _private: Private) -> ProtoBytes { 104 ProtoBytes::from(AsRef::<[u8]>::as_ref(self)) 105 } 106 } 107 108 impl IntoProxied<ProtoBytes> for Box<[u8]> { into_proxied(self, _private: Private) -> ProtoBytes109 fn into_proxied(self, _private: Private) -> ProtoBytes { 110 ProtoBytes::from(AsRef::<[u8]>::as_ref(&self)) 111 } 112 } 113 114 impl IntoProxied<ProtoBytes> for Cow<'_, [u8]> { into_proxied(self, _private: Private) -> ProtoBytes115 fn into_proxied(self, _private: Private) -> ProtoBytes { 116 ProtoBytes::from(AsRef::<[u8]>::as_ref(&self)) 117 } 118 } 119 120 impl IntoProxied<ProtoBytes> for Rc<[u8]> { into_proxied(self, _private: Private) -> ProtoBytes121 fn into_proxied(self, _private: Private) -> ProtoBytes { 122 ProtoBytes::from(AsRef::<[u8]>::as_ref(&self)) 123 } 124 } 125 126 impl IntoProxied<ProtoBytes> for Arc<[u8]> { into_proxied(self, _private: Private) -> ProtoBytes127 fn into_proxied(self, _private: Private) -> ProtoBytes { 128 ProtoBytes::from(AsRef::<[u8]>::as_ref(&self)) 129 } 130 } 131 132 impl SealedInternal for &[u8] {} 133 134 impl<'msg> Proxy<'msg> for &'msg [u8] {} 135 136 impl AsView for &[u8] { 137 type Proxied = ProtoBytes; 138 as_view(&self) -> &[u8]139 fn as_view(&self) -> &[u8] { 140 self 141 } 142 } 143 144 impl<'msg> IntoView<'msg> for &'msg [u8] { into_view<'shorter>(self) -> &'shorter [u8] where 'msg: 'shorter,145 fn into_view<'shorter>(self) -> &'shorter [u8] 146 where 147 'msg: 'shorter, 148 { 149 self 150 } 151 } 152 153 impl<'msg> ViewProxy<'msg> for &'msg [u8] {} 154 155 /// The bytes were not valid UTF-8. 156 #[derive(Debug, PartialEq)] 157 pub struct Utf8Error { 158 pub(crate) inner: std::str::Utf8Error, 159 } 160 impl std::fmt::Display for Utf8Error { fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result161 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 162 self.inner.fmt(f) 163 } 164 } 165 166 impl std::error::Error for Utf8Error {} 167 168 impl From<std::str::Utf8Error> for Utf8Error { from(inner: std::str::Utf8Error) -> Utf8Error169 fn from(inner: std::str::Utf8Error) -> Utf8Error { 170 Utf8Error { inner } 171 } 172 } 173 174 /// An owned type representing protobuf `string` field's contents. 175 /// 176 /// # UTF-8 177 /// 178 /// Protobuf [docs] state that a `string` field contains UTF-8 encoded text. 179 /// However, not every runtime enforces this, and the Rust runtime is designed 180 /// to integrate with other runtimes with FFI, like C++. 181 /// 182 /// `ProtoString` represents a string type that is expected to contain valid 183 /// UTF-8. However, `ProtoString` is not validated, so users must 184 /// call [`ProtoString::to_string`] to perform a (possibly runtime-elided) UTF-8 185 /// validation check. This validation should rarely fail in pure Rust programs, 186 /// but is necessary to prevent UB when interacting with C++, or other languages 187 /// with looser restrictions. 188 /// 189 /// 190 /// # `Display` and `ToString` 191 /// `ProtoString` is ordinarily UTF-8 and so implements `Display`. If there are 192 /// any invalid UTF-8 sequences, they are replaced with [`U+FFFD REPLACEMENT 193 /// CHARACTER`]. Because anything implementing `Display` also implements 194 /// `ToString`, `ProtoString::to_string()` is equivalent to 195 /// `String::from_utf8_lossy(proto_string.as_bytes()).into_owned()`. 196 /// 197 /// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER 198 pub struct ProtoString { 199 pub(crate) inner: InnerProtoString, 200 } 201 202 impl ProtoString { as_view(&self) -> &ProtoStr203 pub fn as_view(&self) -> &ProtoStr { 204 unsafe { ProtoStr::from_utf8_unchecked(self.as_bytes()) } 205 } 206 as_bytes(&self) -> &[u8]207 pub fn as_bytes(&self) -> &[u8] { 208 self.inner.as_bytes() 209 } 210 211 // Returns the kernel-specific container. This method is private in spirit and 212 // must not be called by a user. 213 #[doc(hidden)] into_inner(self, _private: Private) -> InnerProtoString214 pub fn into_inner(self, _private: Private) -> InnerProtoString { 215 self.inner 216 } 217 218 #[doc(hidden)] from_inner(_private: Private, inner: InnerProtoString) -> ProtoString219 pub fn from_inner(_private: Private, inner: InnerProtoString) -> ProtoString { 220 Self { inner } 221 } 222 } 223 224 impl SealedInternal for ProtoString {} 225 226 impl AsRef<[u8]> for ProtoString { as_ref(&self) -> &[u8]227 fn as_ref(&self) -> &[u8] { 228 self.inner.as_bytes() 229 } 230 } 231 232 impl From<ProtoString> for ProtoBytes { from(v: ProtoString) -> Self233 fn from(v: ProtoString) -> Self { 234 ProtoBytes { inner: v.inner } 235 } 236 } 237 238 impl From<&str> for ProtoString { from(v: &str) -> Self239 fn from(v: &str) -> Self { 240 Self::from(v.as_bytes()) 241 } 242 } 243 244 impl From<&[u8]> for ProtoString { from(v: &[u8]) -> Self245 fn from(v: &[u8]) -> Self { 246 Self { inner: InnerProtoString::from(v) } 247 } 248 } 249 250 impl SealedInternal for &str {} 251 252 impl SealedInternal for &ProtoStr {} 253 254 impl IntoProxied<ProtoString> for &str { into_proxied(self, _private: Private) -> ProtoString255 fn into_proxied(self, _private: Private) -> ProtoString { 256 ProtoString::from(self) 257 } 258 } 259 260 impl IntoProxied<ProtoString> for &ProtoStr { into_proxied(self, _private: Private) -> ProtoString261 fn into_proxied(self, _private: Private) -> ProtoString { 262 ProtoString::from(self.as_bytes()) 263 } 264 } 265 266 impl IntoProxied<ProtoString> for String { into_proxied(self, _private: Private) -> ProtoString267 fn into_proxied(self, _private: Private) -> ProtoString { 268 ProtoString::from(self.as_str()) 269 } 270 } 271 272 impl IntoProxied<ProtoString> for &String { into_proxied(self, _private: Private) -> ProtoString273 fn into_proxied(self, _private: Private) -> ProtoString { 274 ProtoString::from(self.as_bytes()) 275 } 276 } 277 278 impl IntoProxied<ProtoString> for OsString { into_proxied(self, private: Private) -> ProtoString279 fn into_proxied(self, private: Private) -> ProtoString { 280 self.as_os_str().into_proxied(private) 281 } 282 } 283 284 impl IntoProxied<ProtoString> for &OsStr { into_proxied(self, _private: Private) -> ProtoString285 fn into_proxied(self, _private: Private) -> ProtoString { 286 ProtoString::from(self.as_encoded_bytes()) 287 } 288 } 289 290 impl IntoProxied<ProtoString> for Box<str> { into_proxied(self, _private: Private) -> ProtoString291 fn into_proxied(self, _private: Private) -> ProtoString { 292 ProtoString::from(AsRef::<str>::as_ref(&self)) 293 } 294 } 295 296 impl IntoProxied<ProtoString> for Cow<'_, str> { into_proxied(self, _private: Private) -> ProtoString297 fn into_proxied(self, _private: Private) -> ProtoString { 298 ProtoString::from(AsRef::<str>::as_ref(&self)) 299 } 300 } 301 302 impl IntoProxied<ProtoString> for Rc<str> { into_proxied(self, _private: Private) -> ProtoString303 fn into_proxied(self, _private: Private) -> ProtoString { 304 ProtoString::from(AsRef::<str>::as_ref(&self)) 305 } 306 } 307 308 impl IntoProxied<ProtoString> for Arc<str> { into_proxied(self, _private: Private) -> ProtoString309 fn into_proxied(self, _private: Private) -> ProtoString { 310 ProtoString::from(AsRef::<str>::as_ref(&self)) 311 } 312 } 313 314 /// A shared immutable view of a protobuf `string` field's contents. 315 /// 316 /// Like a `str`, it can be cheaply accessed as bytes and 317 /// is dynamically sized, requiring it be accessed through a pointer. 318 /// 319 /// # UTF-8 and `&str` access 320 /// 321 /// Protobuf [docs] state that a `string` field contains UTF-8 encoded text. 322 /// However, not every runtime enforces this, and the Rust runtime is designed 323 /// to integrate with other runtimes with FFI, like C++. 324 /// 325 /// Because of this, in order to access the contents as a `&str`, users must 326 /// call [`ProtoStr::to_str`] to perform a (possibly runtime-elided) UTF-8 327 /// validation check. However, the Rust API only allows `set()`ting a `string` 328 /// field with data should be valid UTF-8 like a `&str` or a 329 /// `&ProtoStr`. This means that this check should rarely fail, but is necessary 330 /// to prevent UB when interacting with C++, which has looser restrictions. 331 /// 332 /// Most of the time, users should not perform direct `&str` access to the 333 /// contents - this type implements `Display` and comparison with `str`, 334 /// so it's best to avoid a UTF-8 check by working directly with `&ProtoStr` 335 /// or converting to `&[u8]`. 336 /// 337 /// # `Display` and `ToString` 338 /// `ProtoStr` is ordinarily UTF-8 and so implements `Display`. If there are 339 /// any invalid UTF-8 sequences, they are replaced with [`U+FFFD REPLACEMENT 340 /// CHARACTER`]. Because anything implementing `Display` also implements 341 /// `ToString`, `proto_str.to_string()` is equivalent to 342 /// `String::from_utf8_lossy(proto_str.as_bytes()).into_owned()`. 343 /// 344 /// [docs]: https://protobuf.dev/programming-guides/proto2/#scalar 345 /// [dst]: https://doc.rust-lang.org/reference/dynamically-sized-types.html 346 /// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER 347 #[repr(transparent)] 348 pub struct ProtoStr([u8]); 349 350 impl ProtoStr { 351 /// Converts `self` to a byte slice. 352 /// 353 /// Note: this type does not implement `Deref`; you must call `as_bytes()` 354 /// or `AsRef<[u8]>` to get access to bytes. as_bytes(&self) -> &[u8]355 pub fn as_bytes(&self) -> &[u8] { 356 &self.0 357 } 358 359 /// Yields a `&str` slice if `self` contains valid UTF-8. 360 /// 361 /// This may perform a runtime check, dependent on runtime. 362 /// 363 /// `String::from_utf8_lossy(proto_str.as_bytes())` can be used to 364 /// infallibly construct a string, replacing invalid UTF-8 with 365 /// [`U+FFFD REPLACEMENT CHARACTER`]. 366 /// 367 /// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER 368 // This is not `try_to_str` since `to_str` is shorter, with `CStr` as precedent. to_str(&self) -> Result<&str, Utf8Error>369 pub fn to_str(&self) -> Result<&str, Utf8Error> { 370 Ok(std::str::from_utf8(&self.0)?) 371 } 372 373 /// Converts `self` to a string, including invalid characters. 374 /// 375 /// Invalid UTF-8 sequences are replaced with 376 /// [`U+FFFD REPLACEMENT CHARACTER`]. 377 /// 378 /// Users should be prefer this to `.to_string()` provided by `Display`. 379 /// `.to_cow_lossy()` is the same operation, but it may avoid an 380 /// allocation if the string is already UTF-8. 381 /// 382 /// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER 383 // 384 // This method is named `to_string_lossy` in `CStr`, but since `to_string` 385 // also exists on this type, this name was chosen to avoid confusion. to_cow_lossy(&self) -> Cow<'_, str>386 pub fn to_cow_lossy(&self) -> Cow<'_, str> { 387 String::from_utf8_lossy(&self.0) 388 } 389 390 /// Returns `true` if `self` has a length of zero bytes. is_empty(&self) -> bool391 pub fn is_empty(&self) -> bool { 392 self.0.is_empty() 393 } 394 395 /// Returns the length of `self`. 396 /// 397 /// Like `&str`, this is a length in bytes, not `char`s or graphemes. len(&self) -> usize398 pub fn len(&self) -> usize { 399 self.0.len() 400 } 401 402 /// Iterates over the `char`s in this protobuf `string`. 403 /// 404 /// Invalid UTF-8 sequences are replaced with 405 /// [`U+FFFD REPLACEMENT CHARACTER`]. 406 /// 407 /// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER chars(&self) -> impl Iterator<Item = char> + '_ + fmt::Debug408 pub fn chars(&self) -> impl Iterator<Item = char> + '_ + fmt::Debug { 409 Utf8Chunks::new(self.as_bytes()).flat_map(|chunk| { 410 let mut yield_replacement_char = !chunk.invalid().is_empty(); 411 chunk.valid().chars().chain(iter::from_fn(move || { 412 // Yield a single replacement character for every 413 // non-empty invalid sequence. 414 yield_replacement_char.then(|| { 415 yield_replacement_char = false; 416 char::REPLACEMENT_CHARACTER 417 }) 418 })) 419 }) 420 } 421 422 /// Returns an iterator over chunks of UTF-8 data in the string. 423 /// 424 /// An `Ok(&str)` is yielded for every valid UTF-8 chunk, and an 425 /// `Err(&[u8])` for each non-UTF-8 chunk. An `Err` will be emitted 426 /// multiple times in a row for contiguous invalid chunks. Each invalid 427 /// chunk in an `Err` has a maximum length of 3 bytes. utf8_chunks(&self) -> impl Iterator<Item = Result<&str, &[u8]>> + '_428 pub fn utf8_chunks(&self) -> impl Iterator<Item = Result<&str, &[u8]>> + '_ { 429 Utf8Chunks::new(self.as_bytes()).flat_map(|chunk| { 430 let valid = chunk.valid(); 431 let invalid = chunk.invalid(); 432 (!valid.is_empty()) 433 .then_some(Ok(valid)) 434 .into_iter() 435 .chain((!invalid.is_empty()).then_some(Err(invalid))) 436 }) 437 } 438 439 /// Converts known-UTF-8 bytes to a `ProtoStr` without a check. 440 /// 441 /// # Safety 442 /// `bytes` must be valid UTF-8 if the current runtime requires it. from_utf8_unchecked(bytes: &[u8]) -> &Self443 pub unsafe fn from_utf8_unchecked(bytes: &[u8]) -> &Self { 444 // SAFETY: 445 // - `ProtoStr` is `#[repr(transparent)]` over `[u8]`, so it has the same 446 // layout. 447 // - `ProtoStr` has the same pointer metadata and element size as `[u8]`. 448 unsafe { &*(bytes as *const [u8] as *const Self) } 449 } 450 451 /// Interprets a string slice as a `&ProtoStr`. from_str(string: &str) -> &Self452 pub fn from_str(string: &str) -> &Self { 453 // SAFETY: `string.as_bytes()` is valid UTF-8. 454 unsafe { Self::from_utf8_unchecked(string.as_bytes()) } 455 } 456 } 457 458 impl AsRef<[u8]> for ProtoStr { as_ref(&self) -> &[u8]459 fn as_ref(&self) -> &[u8] { 460 self.as_bytes() 461 } 462 } 463 464 impl<'msg> From<&'msg ProtoStr> for &'msg [u8] { from(val: &'msg ProtoStr) -> &'msg [u8]465 fn from(val: &'msg ProtoStr) -> &'msg [u8] { 466 val.as_bytes() 467 } 468 } 469 470 impl<'msg> From<&'msg str> for &'msg ProtoStr { from(val: &'msg str) -> &'msg ProtoStr471 fn from(val: &'msg str) -> &'msg ProtoStr { 472 ProtoStr::from_str(val) 473 } 474 } 475 476 impl<'msg> TryFrom<&'msg ProtoStr> for &'msg str { 477 type Error = Utf8Error; 478 try_from(val: &'msg ProtoStr) -> Result<&'msg str, Utf8Error>479 fn try_from(val: &'msg ProtoStr) -> Result<&'msg str, Utf8Error> { 480 val.to_str() 481 } 482 } 483 484 impl<'msg> TryFrom<&'msg [u8]> for &'msg ProtoStr { 485 type Error = Utf8Error; 486 try_from(val: &'msg [u8]) -> Result<&'msg ProtoStr, Utf8Error>487 fn try_from(val: &'msg [u8]) -> Result<&'msg ProtoStr, Utf8Error> { 488 Ok(ProtoStr::from_str(std::str::from_utf8(val)?)) 489 } 490 } 491 492 impl fmt::Debug for ProtoStr { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result493 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 494 fmt::Debug::fmt(&Utf8Chunks::new(self.as_bytes()).debug(), f) 495 } 496 } 497 498 impl fmt::Display for ProtoStr { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result499 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 500 use std::fmt::Write as _; 501 for chunk in Utf8Chunks::new(self.as_bytes()) { 502 fmt::Display::fmt(chunk.valid(), f)?; 503 if !chunk.invalid().is_empty() { 504 // One invalid chunk is emitted per detected invalid sequence. 505 f.write_char(char::REPLACEMENT_CHARACTER)?; 506 } 507 } 508 Ok(()) 509 } 510 } 511 512 impl Hash for ProtoStr { hash<H: Hasher>(&self, state: &mut H)513 fn hash<H: Hasher>(&self, state: &mut H) { 514 self.as_bytes().hash(state) 515 } 516 } 517 518 impl Eq for ProtoStr {} 519 impl Ord for ProtoStr { cmp(&self, other: &ProtoStr) -> Ordering520 fn cmp(&self, other: &ProtoStr) -> Ordering { 521 self.as_bytes().cmp(other.as_bytes()) 522 } 523 } 524 525 impl Proxied for ProtoString { 526 type View<'msg> = &'msg ProtoStr; 527 } 528 529 impl AsView for ProtoString { 530 type Proxied = Self; 531 as_view(&self) -> &ProtoStr532 fn as_view(&self) -> &ProtoStr { 533 self.as_view() 534 } 535 } 536 537 impl<'msg> Proxy<'msg> for &'msg ProtoStr {} 538 539 impl AsView for &ProtoStr { 540 type Proxied = ProtoString; 541 as_view(&self) -> &ProtoStr542 fn as_view(&self) -> &ProtoStr { 543 self 544 } 545 } 546 547 impl<'msg> IntoView<'msg> for &'msg ProtoStr { into_view<'shorter>(self) -> &'shorter ProtoStr where 'msg: 'shorter,548 fn into_view<'shorter>(self) -> &'shorter ProtoStr 549 where 550 'msg: 'shorter, 551 { 552 self 553 } 554 } 555 556 impl<'msg> ViewProxy<'msg> for &'msg ProtoStr {} 557 558 /// Implements `PartialCmp` and `PartialEq` for the `lhs` against the `rhs` 559 /// using `AsRef<[u8]>`. 560 // TODO: consider improving to not require a `<()>` if no generics are 561 // needed 562 macro_rules! impl_bytes_partial_cmp { 563 ($(<($($generics:tt)*)> $lhs:ty => $rhs:ty),+ $(,)?) => { 564 $( 565 impl<$($generics)*> PartialEq<$rhs> for $lhs { 566 fn eq(&self, other: &$rhs) -> bool { 567 AsRef::<[u8]>::as_ref(self) == AsRef::<[u8]>::as_ref(other) 568 } 569 } 570 impl<$($generics)*> PartialOrd<$rhs> for $lhs { 571 fn partial_cmp(&self, other: &$rhs) -> Option<Ordering> { 572 AsRef::<[u8]>::as_ref(self).partial_cmp(AsRef::<[u8]>::as_ref(other)) 573 } 574 } 575 )* 576 }; 577 } 578 579 impl_bytes_partial_cmp!( 580 // `ProtoStr` against protobuf types 581 <()> ProtoStr => ProtoStr, 582 583 // `ProtoStr` against foreign types 584 <()> ProtoStr => str, 585 <()> str => ProtoStr, 586 ); 587 588 impl std::fmt::Debug for ProtoString { fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error>589 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { 590 std::fmt::Debug::fmt(self.as_view(), f) 591 } 592 } 593 594 impl std::fmt::Debug for ProtoBytes { fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error>595 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { 596 std::fmt::Debug::fmt(self.as_view(), f) 597 } 598 } 599 600 unsafe impl Sync for ProtoString {} 601 unsafe impl Send for ProtoString {} 602 603 unsafe impl Send for ProtoBytes {} 604 unsafe impl Sync for ProtoBytes {} 605 606 #[cfg(test)] 607 mod tests { 608 use super::*; 609 use googletest::prelude::*; 610 611 // TODO: Add unit tests 612 613 // Shorter and safe utility function to construct `ProtoStr` from bytes for 614 // testing. test_proto_str(bytes: &[u8]) -> &ProtoStr615 fn test_proto_str(bytes: &[u8]) -> &ProtoStr { 616 // SAFETY: The runtime that this test executes under does not elide UTF-8 checks 617 // inside of `ProtoStr`. 618 unsafe { ProtoStr::from_utf8_unchecked(bytes) } 619 } 620 621 // UTF-8 test cases copied from: 622 // https://github.com/rust-lang/rust/blob/e8ee0b7/library/core/tests/str_lossy.rs 623 624 #[gtest] proto_str_debug()625 fn proto_str_debug() { 626 assert_eq!(&format!("{:?}", test_proto_str(b"Hello There")), "\"Hello There\""); 627 assert_eq!( 628 &format!( 629 "{:?}", 630 test_proto_str(b"Hello\xC0\x80 There\xE6\x83 Goodbye\xf4\x8d\x93\xaa"), 631 ), 632 "\"Hello\\xC0\\x80 There\\xE6\\x83 Goodbye\\u{10d4ea}\"", 633 ); 634 } 635 636 #[gtest] proto_str_display()637 fn proto_str_display() { 638 assert_eq!(&test_proto_str(b"Hello There").to_string(), "Hello There"); 639 assert_eq!( 640 &test_proto_str(b"Hello\xC0\x80 There\xE6\x83 Goodbye\xf4\x8d\x93\xaa").to_string(), 641 "Hello�� There� Goodbye\u{10d4ea}", 642 ); 643 } 644 645 #[gtest] proto_str_to_rust_str()646 fn proto_str_to_rust_str() { 647 assert_eq!(test_proto_str(b"hello").to_str(), Ok("hello")); 648 assert_eq!(test_proto_str("ศไทย中华Việt Nam".as_bytes()).to_str(), Ok("ศไทย中华Việt Nam")); 649 for expect_fail in [ 650 &b"Hello\xC2 There\xFF Goodbye"[..], 651 b"Hello\xC0\x80 There\xE6\x83 Goodbye", 652 b"\xF5foo\xF5\x80bar", 653 b"\xF1foo\xF1\x80bar\xF1\x80\x80baz", 654 b"\xF4foo\xF4\x80bar\xF4\xBFbaz", 655 b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar", 656 b"\xED\xA0\x80foo\xED\xBF\xBFbar", 657 ] { 658 assert!( 659 matches!(test_proto_str(expect_fail).to_str(), Err(Utf8Error { inner: _ })), 660 "{expect_fail:?}" 661 ); 662 } 663 } 664 665 #[gtest] proto_str_to_cow()666 fn proto_str_to_cow() { 667 assert_eq!(test_proto_str(b"hello").to_cow_lossy(), Cow::Borrowed("hello")); 668 assert_eq!( 669 test_proto_str("ศไทย中华Việt Nam".as_bytes()).to_cow_lossy(), 670 Cow::Borrowed("ศไทย中华Việt Nam") 671 ); 672 for (bytes, lossy_str) in [ 673 (&b"Hello\xC2 There\xFF Goodbye"[..], "Hello� There� Goodbye"), 674 (b"Hello\xC0\x80 There\xE6\x83 Goodbye", "Hello�� There� Goodbye"), 675 (b"\xF5foo\xF5\x80bar", "�foo��bar"), 676 (b"\xF1foo\xF1\x80bar\xF1\x80\x80baz", "�foo�bar�baz"), 677 (b"\xF4foo\xF4\x80bar\xF4\xBFbaz", "�foo�bar��baz"), 678 (b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar", "����foo\u{10000}bar"), 679 (b"\xED\xA0\x80foo\xED\xBF\xBFbar", "���foo���bar"), 680 ] { 681 let cow = test_proto_str(bytes).to_cow_lossy(); 682 assert!(matches!(cow, Cow::Owned(_))); 683 assert_eq!(&*cow, lossy_str, "{bytes:?}"); 684 } 685 } 686 687 #[gtest] proto_str_utf8_chunks()688 fn proto_str_utf8_chunks() { 689 macro_rules! assert_chunks { 690 ($bytes:expr, $($chunks:expr),* $(,)?) => { 691 let bytes = $bytes; 692 let chunks: &[std::result::Result<&str, &[u8]>] = &[$($chunks),*]; 693 let s = test_proto_str(bytes); 694 let mut got_chunks = s.utf8_chunks(); 695 let mut expected_chars = chunks.iter().copied(); 696 assert!(got_chunks.eq(expected_chars), "{bytes:?} -> {chunks:?}"); 697 }; 698 } 699 assert_chunks!(b"hello", Ok("hello")); 700 assert_chunks!("ศไทย中华Việt Nam".as_bytes(), Ok("ศไทย中华Việt Nam")); 701 assert_chunks!( 702 b"Hello\xC2 There\xFF Goodbye", 703 Ok("Hello"), 704 Err(b"\xC2"), 705 Ok(" There"), 706 Err(b"\xFF"), 707 Ok(" Goodbye"), 708 ); 709 assert_chunks!( 710 b"Hello\xC0\x80 There\xE6\x83 Goodbye", 711 Ok("Hello"), 712 Err(b"\xC0"), 713 Err(b"\x80"), 714 Ok(" There"), 715 Err(b"\xE6\x83"), 716 Ok(" Goodbye"), 717 ); 718 assert_chunks!( 719 b"\xF5foo\xF5\x80bar", 720 Err(b"\xF5"), 721 Ok("foo"), 722 Err(b"\xF5"), 723 Err(b"\x80"), 724 Ok("bar"), 725 ); 726 assert_chunks!( 727 b"\xF1foo\xF1\x80bar\xF1\x80\x80baz", 728 Err(b"\xF1"), 729 Ok("foo"), 730 Err(b"\xF1\x80"), 731 Ok("bar"), 732 Err(b"\xF1\x80\x80"), 733 Ok("baz"), 734 ); 735 assert_chunks!( 736 b"\xF4foo\xF4\x80bar\xF4\xBFbaz", 737 Err(b"\xF4"), 738 Ok("foo"), 739 Err(b"\xF4\x80"), 740 Ok("bar"), 741 Err(b"\xF4"), 742 Err(b"\xBF"), 743 Ok("baz"), 744 ); 745 assert_chunks!( 746 b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar", 747 Err(b"\xF0"), 748 Err(b"\x80"), 749 Err(b"\x80"), 750 Err(b"\x80"), 751 Ok("foo\u{10000}bar"), 752 ); 753 assert_chunks!( 754 b"\xED\xA0\x80foo\xED\xBF\xBFbar", 755 Err(b"\xED"), 756 Err(b"\xA0"), 757 Err(b"\x80"), 758 Ok("foo"), 759 Err(b"\xED"), 760 Err(b"\xBF"), 761 Err(b"\xBF"), 762 Ok("bar"), 763 ); 764 } 765 766 #[gtest] proto_str_chars()767 fn proto_str_chars() { 768 macro_rules! assert_chars { 769 ($bytes:expr, $chars:expr) => { 770 let bytes = $bytes; 771 let chars = $chars; 772 let s = test_proto_str(bytes); 773 let mut got_chars = s.chars(); 774 let mut expected_chars = chars.into_iter(); 775 assert!(got_chars.eq(expected_chars), "{bytes:?} -> {chars:?}"); 776 }; 777 } 778 assert_chars!(b"hello", ['h', 'e', 'l', 'l', 'o']); 779 assert_chars!( 780 "ศไทย中华Việt Nam".as_bytes(), 781 ['ศ', 'ไ', 'ท', 'ย', '中', '华', 'V', 'i', 'ệ', 't', ' ', 'N', 'a', 'm'] 782 ); 783 assert_chars!( 784 b"Hello\xC2 There\xFF Goodbye", 785 [ 786 'H', 'e', 'l', 'l', 'o', '�', ' ', 'T', 'h', 'e', 'r', 'e', '�', ' ', 'G', 'o', 787 'o', 'd', 'b', 'y', 'e' 788 ] 789 ); 790 assert_chars!( 791 b"Hello\xC0\x80 There\xE6\x83 Goodbye", 792 [ 793 'H', 'e', 'l', 'l', 'o', '�', '�', ' ', 'T', 'h', 'e', 'r', 'e', '�', ' ', 'G', 794 'o', 'o', 'd', 'b', 'y', 'e' 795 ] 796 ); 797 assert_chars!(b"\xF5foo\xF5\x80bar", ['�', 'f', 'o', 'o', '�', '�', 'b', 'a', 'r']); 798 assert_chars!( 799 b"\xF1foo\xF1\x80bar\xF1\x80\x80baz", 800 ['�', 'f', 'o', 'o', '�', 'b', 'a', 'r', '�', 'b', 'a', 'z'] 801 ); 802 assert_chars!( 803 b"\xF4foo\xF4\x80bar\xF4\xBFbaz", 804 ['�', 'f', 'o', 'o', '�', 'b', 'a', 'r', '�', '�', 'b', 'a', 'z'] 805 ); 806 assert_chars!( 807 b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar", 808 ['�', '�', '�', '�', 'f', 'o', 'o', '\u{10000}', 'b', 'a', 'r'] 809 ); 810 assert_chars!( 811 b"\xED\xA0\x80foo\xED\xBF\xBFbar", 812 ['�', '�', '�', 'f', 'o', 'o', '�', '�', '�', 'b', 'a', 'r'] 813 ); 814 } 815 } 816