• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2023 Google LLC.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 #include <stdlib.h>
9 
10 #include <gtest/gtest.h>
11 #include "upb/base/string_view.h"
12 #include "upb/base/upcast.h"
13 #include "upb/mem/arena.h"
14 #include "upb/mem/arena.hpp"
15 #include "upb/message/utf8_test.upb.h"
16 #include "upb/message/utf8_test.upb_minitable.h"
17 #include "upb/message/utf8_test_proto2.upb.h"
18 #include "upb/message/utf8_test_proto2.upb_minitable.h"
19 #include "upb/wire/decode.h"
20 
21 namespace {
22 
23 const char bad_utf8[] = "\xff";
24 
GetBadUtf8Payload(upb_Arena * arena,size_t * size)25 static char* GetBadUtf8Payload(upb_Arena* arena, size_t* size) {
26   upb_test_TestUtf8Bytes* msg = upb_test_TestUtf8Bytes_new(arena);
27   upb_test_TestUtf8Bytes_set_data(msg, upb_StringView_FromString(bad_utf8));
28   char* data = upb_test_TestUtf8Bytes_serialize(msg, arena, size);
29   EXPECT_TRUE(data != nullptr);
30   return data;
31 }
32 
TEST(Utf8Test,BytesFieldDoesntValidate)33 TEST(Utf8Test, BytesFieldDoesntValidate) {
34   upb::Arena arena;
35   size_t size;
36   char* data = GetBadUtf8Payload(arena.ptr(), &size);
37   upb_test_TestUtf8Bytes* msg2 =
38       upb_test_TestUtf8Bytes_parse(data, size, arena.ptr());
39 
40   // Parse succeeds, because the bytes field does not validate UTF-8.
41   ASSERT_TRUE(msg2 != nullptr);
42 }
43 
TEST(Utf8Test,Proto3FieldValidates)44 TEST(Utf8Test, Proto3FieldValidates) {
45   upb::Arena arena;
46   size_t size;
47   char* data = GetBadUtf8Payload(arena.ptr(), &size);
48 
49   upb_test_TestUtf8Proto3String* msg =
50       upb_test_TestUtf8Proto3String_new(arena.ptr());
51 
52   upb_DecodeStatus status = upb_Decode(
53       data, size, UPB_UPCAST(msg), &upb_0test__TestUtf8Proto3String_msg_init,
54       nullptr, 0, arena.ptr());
55 
56   // Parse fails, because proto3 string fields validate UTF-8.
57   ASSERT_EQ(kUpb_DecodeStatus_BadUtf8, status);
58 }
59 
TEST(Utf8Test,RepeatedProto3FieldValidates)60 TEST(Utf8Test, RepeatedProto3FieldValidates) {
61   upb::Arena arena;
62   size_t size;
63   char* data = GetBadUtf8Payload(arena.ptr(), &size);
64 
65   upb_test_TestUtf8RepeatedProto3String* msg =
66       upb_test_TestUtf8RepeatedProto3String_new(arena.ptr());
67 
68   upb_DecodeStatus status =
69       upb_Decode(data, size, UPB_UPCAST(msg),
70                  &upb_0test__TestUtf8RepeatedProto3String_msg_init, nullptr, 0,
71                  arena.ptr());
72 
73   // Parse fails, because proto3 string fields validate UTF-8.
74   ASSERT_EQ(kUpb_DecodeStatus_BadUtf8, status);
75 }
76 
TEST(Utf8Test,Proto2BytesValidates)77 TEST(Utf8Test, Proto2BytesValidates) {
78   upb::Arena arena;
79   size_t size;
80   char* data = GetBadUtf8Payload(arena.ptr(), &size);
81 
82   upb_test_TestUtf8Proto2Bytes* msg =
83       upb_test_TestUtf8Proto2Bytes_new(arena.ptr());
84 
85   upb_DecodeStatus status;
86   status = upb_Decode(data, size, UPB_UPCAST(msg),
87                       &upb_0test__TestUtf8Proto2Bytes_msg_init, nullptr, 0,
88                       arena.ptr());
89 
90   // Parse succeeds, because proto2 bytes fields don't validate UTF-8.
91   ASSERT_EQ(kUpb_DecodeStatus_Ok, status);
92 }
93 
TEST(Utf8Test,Proto2RepeatedBytesValidates)94 TEST(Utf8Test, Proto2RepeatedBytesValidates) {
95   upb::Arena arena;
96   size_t size;
97   char* data = GetBadUtf8Payload(arena.ptr(), &size);
98 
99   upb_test_TestUtf8RepeatedProto2Bytes* msg =
100       upb_test_TestUtf8RepeatedProto2Bytes_new(arena.ptr());
101 
102   upb_DecodeStatus status;
103   status = upb_Decode(data, size, UPB_UPCAST(msg),
104                       &upb_0test__TestUtf8RepeatedProto2Bytes_msg_init, nullptr,
105                       0, arena.ptr());
106 
107   // Parse succeeds, because proto2 bytes fields don't validate UTF-8.
108   ASSERT_EQ(kUpb_DecodeStatus_Ok, status);
109 }
110 
TEST(Utf8Test,Proto2StringValidates)111 TEST(Utf8Test, Proto2StringValidates) {
112   upb::Arena arena;
113   size_t size;
114   char* data = GetBadUtf8Payload(arena.ptr(), &size);
115 
116   upb_test_TestUtf8Proto2String* msg =
117       upb_test_TestUtf8Proto2String_new(arena.ptr());
118 
119   upb_DecodeStatus status;
120   status = upb_Decode(data, size, UPB_UPCAST(msg),
121                       &upb_0test__TestUtf8Proto2String_msg_init, nullptr, 0,
122                       arena.ptr());
123 
124   // Parse succeeds, because proto2 string fields don't validate UTF-8.
125   ASSERT_EQ(kUpb_DecodeStatus_Ok, status);
126 }
127 
TEST(Utf8Test,Proto2FieldFailsValidation)128 TEST(Utf8Test, Proto2FieldFailsValidation) {
129   upb::Arena arena;
130   size_t size;
131   char* data = GetBadUtf8Payload(arena.ptr(), &size);
132 
133   upb_test_TestUtf8Proto2String* msg =
134       upb_test_TestUtf8Proto2String_new(arena.ptr());
135 
136   upb_DecodeStatus status;
137   status = upb_Decode(data, size, UPB_UPCAST(msg),
138                       &upb_0test__TestUtf8Proto2String_msg_init, nullptr, 0,
139                       arena.ptr());
140 
141   // Parse fails, because we pass in kUpb_DecodeOption_AlwaysValidateUtf8 to
142   // force validation of proto2 string fields.
143   status = upb_Decode(data, size, UPB_UPCAST(msg),
144                       &upb_0test__TestUtf8Proto2String_msg_init, nullptr,
145                       kUpb_DecodeOption_AlwaysValidateUtf8, arena.ptr());
146   ASSERT_EQ(kUpb_DecodeStatus_BadUtf8, status);
147 }
148 
TEST(Utf8Test,Proto2RepeatedFieldFailsValidation)149 TEST(Utf8Test, Proto2RepeatedFieldFailsValidation) {
150   upb::Arena arena;
151   size_t size;
152   char* data = GetBadUtf8Payload(arena.ptr(), &size);
153 
154   upb_test_TestUtf8RepeatedProto2String* msg =
155       upb_test_TestUtf8RepeatedProto2String_new(arena.ptr());
156 
157   upb_DecodeStatus status;
158   status = upb_Decode(data, size, UPB_UPCAST(msg),
159                       &upb_0test__TestUtf8RepeatedProto2String_msg_init,
160                       nullptr, 0, arena.ptr());
161 
162   // Parse fails, because we pass in kUpb_DecodeOption_AlwaysValidateUtf8 to
163   // force validation of proto2 string fields.
164   status =
165       upb_Decode(data, size, UPB_UPCAST(msg),
166                  &upb_0test__TestUtf8RepeatedProto2String_msg_init, nullptr,
167                  kUpb_DecodeOption_AlwaysValidateUtf8, arena.ptr());
168   ASSERT_EQ(kUpb_DecodeStatus_BadUtf8, status);
169 }
170 
171 }  // namespace
172