1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 #include <stdlib.h>
9
10 #include <gtest/gtest.h>
11 #include "upb/base/string_view.h"
12 #include "upb/base/upcast.h"
13 #include "upb/mem/arena.h"
14 #include "upb/mem/arena.hpp"
15 #include "upb/message/utf8_test.upb.h"
16 #include "upb/message/utf8_test.upb_minitable.h"
17 #include "upb/message/utf8_test_proto2.upb.h"
18 #include "upb/message/utf8_test_proto2.upb_minitable.h"
19 #include "upb/wire/decode.h"
20
21 namespace {
22
23 const char bad_utf8[] = "\xff";
24
GetBadUtf8Payload(upb_Arena * arena,size_t * size)25 static char* GetBadUtf8Payload(upb_Arena* arena, size_t* size) {
26 upb_test_TestUtf8Bytes* msg = upb_test_TestUtf8Bytes_new(arena);
27 upb_test_TestUtf8Bytes_set_data(msg, upb_StringView_FromString(bad_utf8));
28 char* data = upb_test_TestUtf8Bytes_serialize(msg, arena, size);
29 EXPECT_TRUE(data != nullptr);
30 return data;
31 }
32
TEST(Utf8Test,BytesFieldDoesntValidate)33 TEST(Utf8Test, BytesFieldDoesntValidate) {
34 upb::Arena arena;
35 size_t size;
36 char* data = GetBadUtf8Payload(arena.ptr(), &size);
37 upb_test_TestUtf8Bytes* msg2 =
38 upb_test_TestUtf8Bytes_parse(data, size, arena.ptr());
39
40 // Parse succeeds, because the bytes field does not validate UTF-8.
41 ASSERT_TRUE(msg2 != nullptr);
42 }
43
TEST(Utf8Test,Proto3FieldValidates)44 TEST(Utf8Test, Proto3FieldValidates) {
45 upb::Arena arena;
46 size_t size;
47 char* data = GetBadUtf8Payload(arena.ptr(), &size);
48
49 upb_test_TestUtf8Proto3String* msg =
50 upb_test_TestUtf8Proto3String_new(arena.ptr());
51
52 upb_DecodeStatus status = upb_Decode(
53 data, size, UPB_UPCAST(msg), &upb_0test__TestUtf8Proto3String_msg_init,
54 nullptr, 0, arena.ptr());
55
56 // Parse fails, because proto3 string fields validate UTF-8.
57 ASSERT_EQ(kUpb_DecodeStatus_BadUtf8, status);
58 }
59
TEST(Utf8Test,RepeatedProto3FieldValidates)60 TEST(Utf8Test, RepeatedProto3FieldValidates) {
61 upb::Arena arena;
62 size_t size;
63 char* data = GetBadUtf8Payload(arena.ptr(), &size);
64
65 upb_test_TestUtf8RepeatedProto3String* msg =
66 upb_test_TestUtf8RepeatedProto3String_new(arena.ptr());
67
68 upb_DecodeStatus status =
69 upb_Decode(data, size, UPB_UPCAST(msg),
70 &upb_0test__TestUtf8RepeatedProto3String_msg_init, nullptr, 0,
71 arena.ptr());
72
73 // Parse fails, because proto3 string fields validate UTF-8.
74 ASSERT_EQ(kUpb_DecodeStatus_BadUtf8, status);
75 }
76
TEST(Utf8Test,Proto2BytesValidates)77 TEST(Utf8Test, Proto2BytesValidates) {
78 upb::Arena arena;
79 size_t size;
80 char* data = GetBadUtf8Payload(arena.ptr(), &size);
81
82 upb_test_TestUtf8Proto2Bytes* msg =
83 upb_test_TestUtf8Proto2Bytes_new(arena.ptr());
84
85 upb_DecodeStatus status;
86 status = upb_Decode(data, size, UPB_UPCAST(msg),
87 &upb_0test__TestUtf8Proto2Bytes_msg_init, nullptr, 0,
88 arena.ptr());
89
90 // Parse succeeds, because proto2 bytes fields don't validate UTF-8.
91 ASSERT_EQ(kUpb_DecodeStatus_Ok, status);
92 }
93
TEST(Utf8Test,Proto2RepeatedBytesValidates)94 TEST(Utf8Test, Proto2RepeatedBytesValidates) {
95 upb::Arena arena;
96 size_t size;
97 char* data = GetBadUtf8Payload(arena.ptr(), &size);
98
99 upb_test_TestUtf8RepeatedProto2Bytes* msg =
100 upb_test_TestUtf8RepeatedProto2Bytes_new(arena.ptr());
101
102 upb_DecodeStatus status;
103 status = upb_Decode(data, size, UPB_UPCAST(msg),
104 &upb_0test__TestUtf8RepeatedProto2Bytes_msg_init, nullptr,
105 0, arena.ptr());
106
107 // Parse succeeds, because proto2 bytes fields don't validate UTF-8.
108 ASSERT_EQ(kUpb_DecodeStatus_Ok, status);
109 }
110
TEST(Utf8Test,Proto2StringValidates)111 TEST(Utf8Test, Proto2StringValidates) {
112 upb::Arena arena;
113 size_t size;
114 char* data = GetBadUtf8Payload(arena.ptr(), &size);
115
116 upb_test_TestUtf8Proto2String* msg =
117 upb_test_TestUtf8Proto2String_new(arena.ptr());
118
119 upb_DecodeStatus status;
120 status = upb_Decode(data, size, UPB_UPCAST(msg),
121 &upb_0test__TestUtf8Proto2String_msg_init, nullptr, 0,
122 arena.ptr());
123
124 // Parse succeeds, because proto2 string fields don't validate UTF-8.
125 ASSERT_EQ(kUpb_DecodeStatus_Ok, status);
126 }
127
TEST(Utf8Test,Proto2FieldFailsValidation)128 TEST(Utf8Test, Proto2FieldFailsValidation) {
129 upb::Arena arena;
130 size_t size;
131 char* data = GetBadUtf8Payload(arena.ptr(), &size);
132
133 upb_test_TestUtf8Proto2String* msg =
134 upb_test_TestUtf8Proto2String_new(arena.ptr());
135
136 upb_DecodeStatus status;
137 status = upb_Decode(data, size, UPB_UPCAST(msg),
138 &upb_0test__TestUtf8Proto2String_msg_init, nullptr, 0,
139 arena.ptr());
140
141 // Parse fails, because we pass in kUpb_DecodeOption_AlwaysValidateUtf8 to
142 // force validation of proto2 string fields.
143 status = upb_Decode(data, size, UPB_UPCAST(msg),
144 &upb_0test__TestUtf8Proto2String_msg_init, nullptr,
145 kUpb_DecodeOption_AlwaysValidateUtf8, arena.ptr());
146 ASSERT_EQ(kUpb_DecodeStatus_BadUtf8, status);
147 }
148
TEST(Utf8Test,Proto2RepeatedFieldFailsValidation)149 TEST(Utf8Test, Proto2RepeatedFieldFailsValidation) {
150 upb::Arena arena;
151 size_t size;
152 char* data = GetBadUtf8Payload(arena.ptr(), &size);
153
154 upb_test_TestUtf8RepeatedProto2String* msg =
155 upb_test_TestUtf8RepeatedProto2String_new(arena.ptr());
156
157 upb_DecodeStatus status;
158 status = upb_Decode(data, size, UPB_UPCAST(msg),
159 &upb_0test__TestUtf8RepeatedProto2String_msg_init,
160 nullptr, 0, arena.ptr());
161
162 // Parse fails, because we pass in kUpb_DecodeOption_AlwaysValidateUtf8 to
163 // force validation of proto2 string fields.
164 status =
165 upb_Decode(data, size, UPB_UPCAST(msg),
166 &upb_0test__TestUtf8RepeatedProto2String_msg_init, nullptr,
167 kUpb_DecodeOption_AlwaysValidateUtf8, arena.ptr());
168 ASSERT_EQ(kUpb_DecodeStatus_BadUtf8, status);
169 }
170
171 } // namespace
172