1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 #include "google/protobuf/compiler/retention.h"
9
10 #include <algorithm>
11 #include <memory>
12 #include <string>
13 #include <utility>
14 #include <vector>
15
16 #include "absl/container/flat_hash_set.h"
17 #include "absl/strings/match.h"
18 #include "absl/types/span.h"
19 #include "google/protobuf/descriptor.h"
20 #include "google/protobuf/dynamic_message.h"
21
22 namespace google {
23 namespace protobuf {
24 namespace compiler {
25
26 namespace {
27
IsOptionsProto(const Message & m)28 bool IsOptionsProto(const Message& m) {
29 const Descriptor* descriptor = m.GetDescriptor();
30 return descriptor->file()->name() ==
31 DescriptorProto::descriptor()->file()->name() &&
32 absl::EndsWith(descriptor->name(), "Options");
33 }
34
IsEmpty(const Message & m)35 bool IsEmpty(const Message& m) { return m.ByteSizeLong() == 0; }
36
37 // Recursively strips any options with source retention from the message. If
38 // stripped_paths is not null, then this function will populate it with the
39 // paths that were stripped, using the path format from
40 // SourceCodeInfo.Location. The path parameter is used as a stack tracking the
41 // path to the current location.
StripMessage(Message & m,std::vector<int> & path,std::vector<std::vector<int>> * stripped_paths)42 void StripMessage(Message& m, std::vector<int>& path,
43 std::vector<std::vector<int>>* stripped_paths) {
44 const Reflection* reflection = m.GetReflection();
45 std::vector<const FieldDescriptor*> fields;
46 reflection->ListFields(m, &fields);
47 for (const FieldDescriptor* field : fields) {
48 path.push_back(field->number());
49 if (field->options().retention() == FieldOptions::RETENTION_SOURCE) {
50 reflection->ClearField(&m, field);
51 if (stripped_paths != nullptr) {
52 stripped_paths->push_back(path);
53 }
54 } else if (field->type() == FieldDescriptor::TYPE_MESSAGE) {
55 if (field->is_repeated()) {
56 int field_size = reflection->FieldSize(m, field);
57 for (int i = 0; i < field_size; ++i) {
58 path.push_back(i);
59 StripMessage(*reflection->MutableRepeatedMessage(&m, field, i), path,
60 stripped_paths);
61 path.pop_back();
62 }
63 } else {
64 Message* child = reflection->MutableMessage(&m, field);
65 bool was_nonempty_options_proto =
66 IsOptionsProto(*child) && !IsEmpty(*child);
67 StripMessage(*child, path, stripped_paths);
68 // If this is an options message that became empty due to retention
69 // stripping, remove it.
70 if (was_nonempty_options_proto && IsEmpty(*child)) {
71 reflection->ClearField(&m, field);
72 if (stripped_paths != nullptr) {
73 stripped_paths->push_back(path);
74 }
75 }
76 }
77 }
78 path.pop_back();
79 }
80 }
81
82 // Converts the descriptor to a dynamic message if necessary, and then strips
83 // out all source-retention options.
84 //
85 // The options message may have custom options set on it, and these would
86 // ordinarily appear as unknown fields since they are not linked into protoc.
87 // Using a dynamic message allows us to see these custom options. To convert
88 // back and forth between the generated type and the dynamic message, we have
89 // to serialize one and parse that into the other.
90 //
91 // If stripped_paths is not null, it will be populated with the paths that were
92 // stripped, using the path format from SourceCodeInfo.Location.
ConvertToDynamicMessageAndStripOptions(Message & m,const DescriptorPool & pool,std::vector<std::vector<int>> * stripped_paths=nullptr)93 void ConvertToDynamicMessageAndStripOptions(
94 Message& m, const DescriptorPool& pool,
95 std::vector<std::vector<int>>* stripped_paths = nullptr) {
96 // We need to look up the descriptor in the pool so that we can get a
97 // descriptor which knows about any custom options that were used in the
98 // .proto file.
99 const Descriptor* descriptor = pool.FindMessageTypeByName(m.GetTypeName());
100 std::vector<int> path;
101
102 if (descriptor == nullptr || &pool == DescriptorPool::generated_pool()) {
103 // If the pool does not contain the descriptor, then this proto file does
104 // not transitively depend on descriptor.proto, in which case we know there
105 // are no custom options to worry about. If we are working with the
106 // generated pool, then we can still access any custom options without
107 // having to resort to DynamicMessage.
108 StripMessage(m, path, stripped_paths);
109 } else {
110 // To convert to a dynamic message, we need to serialize the original
111 // descriptor and parse it back again. This can fail if the descriptor is
112 // invalid, so in that case we try to handle it gracefully by stripping the
113 // original descriptor without using DynamicMessage. In this situation we
114 // will generally not be able to strip custom options, but we can at least
115 // strip built-in options.
116 DynamicMessageFactory factory;
117 std::unique_ptr<Message> dynamic_message(
118 factory.GetPrototype(descriptor)->New());
119 std::string serialized;
120 if (!m.SerializePartialToString(&serialized)) {
121 ABSL_LOG_EVERY_N_SEC(ERROR, 1)
122 << "Failed to fully strip source-retention options";
123 StripMessage(m, path, stripped_paths);
124 return;
125 }
126 if (!dynamic_message->ParsePartialFromString(serialized)) {
127 ABSL_LOG_EVERY_N_SEC(ERROR, 1)
128 << "Failed to fully strip source-retention options";
129 StripMessage(m, path, stripped_paths);
130 return;
131 }
132 StripMessage(*dynamic_message, path, stripped_paths);
133 if (!dynamic_message->SerializePartialToString(&serialized)) {
134 ABSL_LOG_EVERY_N_SEC(ERROR, 1)
135 << "Failed to fully strip source-retention options";
136 StripMessage(m, path, stripped_paths);
137 return;
138 }
139 if (!m.ParsePartialFromString(serialized)) {
140 ABSL_LOG_EVERY_N_SEC(ERROR, 1)
141 << "Failed to fully strip source-retention options";
142 StripMessage(m, path, stripped_paths);
143 return;
144 }
145 }
146 }
147
148 // Returns a const reference to the descriptor pool associated with the given
149 // descriptor.
150 template <typename DescriptorType>
GetPool(const DescriptorType & descriptor)151 const google::protobuf::DescriptorPool& GetPool(const DescriptorType& descriptor) {
152 return *descriptor.file()->pool();
153 }
154
155 // Specialization for FileDescriptor.
GetPool(const FileDescriptor & descriptor)156 const google::protobuf::DescriptorPool& GetPool(const FileDescriptor& descriptor) {
157 return *descriptor.pool();
158 }
159
160 // Returns the options associated with the given descriptor, with all
161 // source-retention options stripped out.
162 template <typename DescriptorType>
StripLocalOptions(const DescriptorType & descriptor)163 auto StripLocalOptions(const DescriptorType& descriptor) {
164 auto options = descriptor.options();
165 ConvertToDynamicMessageAndStripOptions(options, GetPool(descriptor));
166 return options;
167 }
168
169 // Returns true if x is a prefix of y.
IsPrefix(absl::Span<const int> x,absl::Span<const int> y)170 bool IsPrefix(absl::Span<const int> x, absl::Span<const int> y) {
171 return x == y.subspan(0, x.size());
172 }
173
174 // Strips the paths in stripped_paths from the SourceCodeInfo.
StripSourceCodeInfo(std::vector<std::vector<int>> & stripped_paths,SourceCodeInfo & source_code_info)175 void StripSourceCodeInfo(std::vector<std::vector<int>>& stripped_paths,
176 SourceCodeInfo& source_code_info) {
177 RepeatedPtrField<SourceCodeInfo::Location>* locations =
178 source_code_info.mutable_location();
179
180 // We sort the locations lexicographically by their paths and include an
181 // index pointing back to the original location.
182 std::vector<std::pair<absl::Span<const int>, int>> sorted_locations;
183 sorted_locations.reserve(locations->size());
184 for (int i = 0; i < locations->size(); ++i) {
185 sorted_locations.emplace_back((*locations)[i].path(), i);
186 }
187 absl::c_sort(sorted_locations);
188 absl::c_sort(stripped_paths);
189
190 // With both arrays sorted, we can efficiently step through them in tandem.
191 // If a stripped path is a prefix of any location, then that is a location
192 // we need to delete from the SourceCodeInfo.
193 absl::flat_hash_set<int> indices_to_delete;
194 auto i = stripped_paths.cbegin();
195 auto j = sorted_locations.cbegin();
196 while (i != stripped_paths.cend() && j != sorted_locations.cend()) {
197 if (IsPrefix(*i, j->first)) {
198 indices_to_delete.insert(j->second);
199 ++j;
200 } else if (*i < j->first) {
201 ++i;
202 } else {
203 ++j;
204 }
205 }
206
207 // We delete the locations in descending order to avoid invalidating
208 // indices.
209 std::vector<SourceCodeInfo::Location*> old_locations;
210 old_locations.resize(locations->size());
211 locations->ExtractSubrange(0, locations->size(), old_locations.data());
212 locations->Reserve(old_locations.size() - indices_to_delete.size());
213 for (size_t i = 0; i < old_locations.size(); ++i) {
214 if (indices_to_delete.contains(i)) {
215 delete old_locations[i];
216 } else {
217 locations->AddAllocated(old_locations[i]);
218 }
219 }
220 }
221
222 } // namespace
223
StripSourceRetentionOptions(const FileDescriptor & file,bool include_source_code_info)224 FileDescriptorProto StripSourceRetentionOptions(const FileDescriptor& file,
225 bool include_source_code_info) {
226 FileDescriptorProto file_proto;
227 file.CopyTo(&file_proto);
228 if (include_source_code_info) {
229 file.CopySourceCodeInfoTo(&file_proto);
230 }
231 StripSourceRetentionOptions(*file.pool(), file_proto);
232 return file_proto;
233 }
234
StripSourceRetentionOptions(const DescriptorPool & pool,FileDescriptorProto & file_proto)235 void StripSourceRetentionOptions(const DescriptorPool& pool,
236 FileDescriptorProto& file_proto) {
237 std::vector<std::vector<int>> stripped_paths;
238 ConvertToDynamicMessageAndStripOptions(file_proto, pool, &stripped_paths);
239 if (file_proto.has_source_code_info()) {
240 StripSourceCodeInfo(stripped_paths, *file_proto.mutable_source_code_info());
241 }
242 }
243
StripSourceRetentionOptions(const Descriptor & message)244 DescriptorProto StripSourceRetentionOptions(const Descriptor& message) {
245 DescriptorProto message_proto;
246 message.CopyTo(&message_proto);
247 ConvertToDynamicMessageAndStripOptions(message_proto,
248 *message.file()->pool());
249 return message_proto;
250 }
251
StripSourceRetentionOptions(const Descriptor & message,const Descriptor::ExtensionRange & range)252 DescriptorProto::ExtensionRange StripSourceRetentionOptions(
253 const Descriptor& message, const Descriptor::ExtensionRange& range) {
254 DescriptorProto::ExtensionRange range_proto;
255 range.CopyTo(&range_proto);
256 ConvertToDynamicMessageAndStripOptions(range_proto, *message.file()->pool());
257 return range_proto;
258 }
259
StripSourceRetentionOptions(const EnumDescriptor & enm)260 EnumDescriptorProto StripSourceRetentionOptions(const EnumDescriptor& enm) {
261 EnumDescriptorProto enm_proto;
262 enm.CopyTo(&enm_proto);
263 ConvertToDynamicMessageAndStripOptions(enm_proto, *enm.file()->pool());
264 return enm_proto;
265 }
266
StripSourceRetentionOptions(const FieldDescriptor & field)267 FieldDescriptorProto StripSourceRetentionOptions(const FieldDescriptor& field) {
268 FieldDescriptorProto field_proto;
269 field.CopyTo(&field_proto);
270 ConvertToDynamicMessageAndStripOptions(field_proto, *field.file()->pool());
271 return field_proto;
272 }
273
StripSourceRetentionOptions(const OneofDescriptor & oneof)274 OneofDescriptorProto StripSourceRetentionOptions(const OneofDescriptor& oneof) {
275 OneofDescriptorProto oneof_proto;
276 oneof.CopyTo(&oneof_proto);
277 ConvertToDynamicMessageAndStripOptions(oneof_proto, *oneof.file()->pool());
278 return oneof_proto;
279 }
280
StripLocalSourceRetentionOptions(const EnumDescriptor & descriptor)281 EnumOptions StripLocalSourceRetentionOptions(const EnumDescriptor& descriptor) {
282 return StripLocalOptions(descriptor);
283 }
284
StripLocalSourceRetentionOptions(const EnumValueDescriptor & descriptor)285 EnumValueOptions StripLocalSourceRetentionOptions(
286 const EnumValueDescriptor& descriptor) {
287 return StripLocalOptions(descriptor);
288 }
289
StripLocalSourceRetentionOptions(const FieldDescriptor & descriptor)290 FieldOptions StripLocalSourceRetentionOptions(
291 const FieldDescriptor& descriptor) {
292 return StripLocalOptions(descriptor);
293 }
294
StripLocalSourceRetentionOptions(const FileDescriptor & descriptor)295 FileOptions StripLocalSourceRetentionOptions(const FileDescriptor& descriptor) {
296 return StripLocalOptions(descriptor);
297 }
298
StripLocalSourceRetentionOptions(const Descriptor & descriptor)299 MessageOptions StripLocalSourceRetentionOptions(const Descriptor& descriptor) {
300 return StripLocalOptions(descriptor);
301 }
302
StripLocalSourceRetentionOptions(const Descriptor & descriptor,const Descriptor::ExtensionRange & range)303 ExtensionRangeOptions StripLocalSourceRetentionOptions(
304 const Descriptor& descriptor, const Descriptor::ExtensionRange& range) {
305 ExtensionRangeOptions options = range.options();
306 ConvertToDynamicMessageAndStripOptions(options, GetPool(descriptor));
307 return options;
308 }
309
StripLocalSourceRetentionOptions(const MethodDescriptor & descriptor)310 MethodOptions StripLocalSourceRetentionOptions(
311 const MethodDescriptor& descriptor) {
312 return StripLocalOptions(descriptor);
313 }
314
StripLocalSourceRetentionOptions(const OneofDescriptor & descriptor)315 OneofOptions StripLocalSourceRetentionOptions(
316 const OneofDescriptor& descriptor) {
317 return StripLocalOptions(descriptor);
318 }
319
StripLocalSourceRetentionOptions(const ServiceDescriptor & descriptor)320 ServiceOptions StripLocalSourceRetentionOptions(
321 const ServiceDescriptor& descriptor) {
322 return StripLocalOptions(descriptor);
323 }
324
325 } // namespace compiler
326 } // namespace protobuf
327 } // namespace google
328