• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2024 Google LLC.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 //! Tests demonstrating the Protobuf Rust behavior around UTF-8 under different
9 //! scenarios (proto2, proto3, editions).
10 
11 // TODO: The behavior is currently subptimal (for example because
12 // b/333545903 or b/335140403). Design and implement desirable changes to this
13 // behavior. Do not assume that the Protobuf team is intentional about these
14 // behaviors while b/304774814 is open.
15 
16 use googletest::prelude::*;
17 use protobuf::prelude::*;
18 
19 use feature_verify_rust_proto::Verify;
20 use no_features_proto2_rust_proto::NoFeaturesProto2;
21 use no_features_proto3_rust_proto::NoFeaturesProto3;
22 use protobuf::{ParseError, ProtoStr};
23 
24 // We use 0b1000_0000, since 0b1XXX_XXXX in UTF-8 denotes a byte 2-4, but never
25 // the first byte.
26 const NON_UTF8_BYTES: &[u8] = b"\x80";
27 
28 // Returns ProtoStr with non-UTF-8 content.
make_non_utf8_proto_str() -> &'static ProtoStr29 fn make_non_utf8_proto_str() -> &'static ProtoStr {
30     unsafe {
31         // SAFETY: This is safe under current implementation of C++ and UPB kernels.
32         // In the hypothethical pure Rust runtime this would be library-level UB - but
33         // this test is specifically present to demonstrate UTF-8 behavior under
34         // C++ and UPB kernels.
35         ProtoStr::from_utf8_unchecked(NON_UTF8_BYTES)
36     }
37 }
38 
39 #[gtest]
test_proto2()40 fn test_proto2() {
41     let non_utf8_str = make_non_utf8_proto_str();
42 
43     let mut msg = NoFeaturesProto2::new();
44 
45     // No error on setter
46     msg.set_my_field(non_utf8_str);
47     assert_that!(msg.my_field().as_bytes(), eq(NON_UTF8_BYTES));
48 
49     // No error on serialization
50     let serialized_nonutf8 = msg.serialize().expect("serialization should not fail");
51 
52     // No error on parsing.
53     let parsed_result = NoFeaturesProto2::parse(&serialized_nonutf8);
54     assert_that!(parsed_result, ok(anything()));
55 }
56 
57 #[gtest]
test_proto3()58 fn test_proto3() {
59     let non_utf8_str = make_non_utf8_proto_str();
60 
61     let mut msg = NoFeaturesProto3::new();
62 
63     // No error on setter
64     msg.set_my_field(non_utf8_str);
65     assert_that!(msg.my_field().as_bytes(), eq(NON_UTF8_BYTES));
66 
67     // No error on serialization
68     let serialized_nonutf8 = msg.serialize().expect("serialization should not fail");
69 
70     // Error on parsing.
71     let parsed_result = NoFeaturesProto3::parse(&serialized_nonutf8);
72     assert_that!(parsed_result, err(matches_pattern!(&ParseError)));
73 }
74 
75 #[gtest]
test_verify()76 fn test_verify() {
77     let non_utf8_str = make_non_utf8_proto_str();
78 
79     let mut msg = Verify::new();
80 
81     // No error on setter
82     msg.set_my_field(non_utf8_str);
83     assert_that!(msg.my_field().as_bytes(), eq(NON_UTF8_BYTES));
84 
85     // No error on serialization
86     let serialized_nonutf8 = msg.serialize().expect("serialization should not fail");
87 
88     // Error on parsing.
89     let parsed_result = Verify::parse(&serialized_nonutf8);
90     assert_that!(parsed_result, err(matches_pattern!(&ParseError)));
91 }
92