• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 #include "google/protobuf/compiler/retention.h"
9 
10 #include <algorithm>
11 #include <memory>
12 #include <string>
13 #include <utility>
14 #include <vector>
15 
16 #include "absl/container/flat_hash_set.h"
17 #include "absl/strings/match.h"
18 #include "absl/types/span.h"
19 #include "google/protobuf/descriptor.h"
20 #include "google/protobuf/dynamic_message.h"
21 
22 namespace google {
23 namespace protobuf {
24 namespace compiler {
25 
26 namespace {
27 
IsOptionsProto(const Message & m)28 bool IsOptionsProto(const Message& m) {
29   const Descriptor* descriptor = m.GetDescriptor();
30   return descriptor->file()->name() ==
31              DescriptorProto::descriptor()->file()->name() &&
32          absl::EndsWith(descriptor->name(), "Options");
33 }
34 
IsEmpty(const Message & m)35 bool IsEmpty(const Message& m) { return m.ByteSizeLong() == 0; }
36 
37 // Recursively strips any options with source retention from the message. If
38 // stripped_paths is not null, then this function will populate it with the
39 // paths that were stripped, using the path format from
40 // SourceCodeInfo.Location. The path parameter is used as a stack tracking the
41 // path to the current location.
StripMessage(Message & m,std::vector<int> & path,std::vector<std::vector<int>> * stripped_paths)42 void StripMessage(Message& m, std::vector<int>& path,
43                   std::vector<std::vector<int>>* stripped_paths) {
44   const Reflection* reflection = m.GetReflection();
45   std::vector<const FieldDescriptor*> fields;
46   reflection->ListFields(m, &fields);
47   for (const FieldDescriptor* field : fields) {
48     path.push_back(field->number());
49     if (field->options().retention() == FieldOptions::RETENTION_SOURCE) {
50       reflection->ClearField(&m, field);
51       if (stripped_paths != nullptr) {
52         stripped_paths->push_back(path);
53       }
54     } else if (field->type() == FieldDescriptor::TYPE_MESSAGE) {
55       if (field->is_repeated()) {
56         int field_size = reflection->FieldSize(m, field);
57         for (int i = 0; i < field_size; ++i) {
58           path.push_back(i);
59           StripMessage(*reflection->MutableRepeatedMessage(&m, field, i), path,
60                        stripped_paths);
61           path.pop_back();
62         }
63       } else {
64         Message* child = reflection->MutableMessage(&m, field);
65         bool was_nonempty_options_proto =
66             IsOptionsProto(*child) && !IsEmpty(*child);
67         StripMessage(*child, path, stripped_paths);
68         // If this is an options message that became empty due to retention
69         // stripping, remove it.
70         if (was_nonempty_options_proto && IsEmpty(*child)) {
71           reflection->ClearField(&m, field);
72           if (stripped_paths != nullptr) {
73             stripped_paths->push_back(path);
74           }
75         }
76       }
77     }
78     path.pop_back();
79   }
80 }
81 
82 // Converts the descriptor to a dynamic message if necessary, and then strips
83 // out all source-retention options.
84 //
85 // The options message may have custom options set on it, and these would
86 // ordinarily appear as unknown fields since they are not linked into protoc.
87 // Using a dynamic message allows us to see these custom options. To convert
88 // back and forth between the generated type and the dynamic message, we have
89 // to serialize one and parse that into the other.
90 //
91 // If stripped_paths is not null, it will be populated with the paths that were
92 // stripped, using the path format from SourceCodeInfo.Location.
ConvertToDynamicMessageAndStripOptions(Message & m,const DescriptorPool & pool,std::vector<std::vector<int>> * stripped_paths=nullptr)93 void ConvertToDynamicMessageAndStripOptions(
94     Message& m, const DescriptorPool& pool,
95     std::vector<std::vector<int>>* stripped_paths = nullptr) {
96   // We need to look up the descriptor in the pool so that we can get a
97   // descriptor which knows about any custom options that were used in the
98   // .proto file.
99   const Descriptor* descriptor = pool.FindMessageTypeByName(m.GetTypeName());
100   std::vector<int> path;
101 
102   if (descriptor == nullptr || &pool == DescriptorPool::generated_pool()) {
103     // If the pool does not contain the descriptor, then this proto file does
104     // not transitively depend on descriptor.proto, in which case we know there
105     // are no custom options to worry about. If we are working with the
106     // generated pool, then we can still access any custom options without
107     // having to resort to DynamicMessage.
108     StripMessage(m, path, stripped_paths);
109   } else {
110     // To convert to a dynamic message, we need to serialize the original
111     // descriptor and parse it back again. This can fail if the descriptor is
112     // invalid, so in that case we try to handle it gracefully by stripping the
113     // original descriptor without using DynamicMessage. In this situation we
114     // will generally not be able to strip custom options, but we can at least
115     // strip built-in options.
116     DynamicMessageFactory factory;
117     std::unique_ptr<Message> dynamic_message(
118         factory.GetPrototype(descriptor)->New());
119     std::string serialized;
120     if (!m.SerializePartialToString(&serialized)) {
121       ABSL_LOG_EVERY_N_SEC(ERROR, 1)
122           << "Failed to fully strip source-retention options";
123       StripMessage(m, path, stripped_paths);
124       return;
125     }
126     if (!dynamic_message->ParsePartialFromString(serialized)) {
127       ABSL_LOG_EVERY_N_SEC(ERROR, 1)
128           << "Failed to fully strip source-retention options";
129       StripMessage(m, path, stripped_paths);
130       return;
131     }
132     StripMessage(*dynamic_message, path, stripped_paths);
133     if (!dynamic_message->SerializePartialToString(&serialized)) {
134       ABSL_LOG_EVERY_N_SEC(ERROR, 1)
135           << "Failed to fully strip source-retention options";
136       StripMessage(m, path, stripped_paths);
137       return;
138     }
139     if (!m.ParsePartialFromString(serialized)) {
140       ABSL_LOG_EVERY_N_SEC(ERROR, 1)
141           << "Failed to fully strip source-retention options";
142       StripMessage(m, path, stripped_paths);
143       return;
144     }
145   }
146 }
147 
148 // Returns a const reference to the descriptor pool associated with the given
149 // descriptor.
150 template <typename DescriptorType>
GetPool(const DescriptorType & descriptor)151 const google::protobuf::DescriptorPool& GetPool(const DescriptorType& descriptor) {
152   return *descriptor.file()->pool();
153 }
154 
155 // Specialization for FileDescriptor.
GetPool(const FileDescriptor & descriptor)156 const google::protobuf::DescriptorPool& GetPool(const FileDescriptor& descriptor) {
157   return *descriptor.pool();
158 }
159 
160 // Returns the options associated with the given descriptor, with all
161 // source-retention options stripped out.
162 template <typename DescriptorType>
StripLocalOptions(const DescriptorType & descriptor)163 auto StripLocalOptions(const DescriptorType& descriptor) {
164   auto options = descriptor.options();
165   ConvertToDynamicMessageAndStripOptions(options, GetPool(descriptor));
166   return options;
167 }
168 
169 // Returns true if x is a prefix of y.
IsPrefix(absl::Span<const int> x,absl::Span<const int> y)170 bool IsPrefix(absl::Span<const int> x, absl::Span<const int> y) {
171   return x == y.subspan(0, x.size());
172 }
173 
174 // Strips the paths in stripped_paths from the SourceCodeInfo.
StripSourceCodeInfo(std::vector<std::vector<int>> & stripped_paths,SourceCodeInfo & source_code_info)175 void StripSourceCodeInfo(std::vector<std::vector<int>>& stripped_paths,
176                          SourceCodeInfo& source_code_info) {
177   RepeatedPtrField<SourceCodeInfo::Location>* locations =
178       source_code_info.mutable_location();
179 
180   // We sort the locations lexicographically by their paths and include an
181   // index pointing back to the original location.
182   std::vector<std::pair<absl::Span<const int>, int>> sorted_locations;
183   sorted_locations.reserve(locations->size());
184   for (int i = 0; i < locations->size(); ++i) {
185     sorted_locations.emplace_back((*locations)[i].path(), i);
186   }
187   absl::c_sort(sorted_locations);
188   absl::c_sort(stripped_paths);
189 
190   // With both arrays sorted, we can efficiently step through them in tandem.
191   // If a stripped path is a prefix of any location, then that is a location
192   // we need to delete from the SourceCodeInfo.
193   absl::flat_hash_set<int> indices_to_delete;
194   auto i = stripped_paths.cbegin();
195   auto j = sorted_locations.cbegin();
196   while (i != stripped_paths.cend() && j != sorted_locations.cend()) {
197     if (IsPrefix(*i, j->first)) {
198       indices_to_delete.insert(j->second);
199       ++j;
200     } else if (*i < j->first) {
201       ++i;
202     } else {
203       ++j;
204     }
205   }
206 
207   // We delete the locations in descending order to avoid invalidating
208   // indices.
209   std::vector<SourceCodeInfo::Location*> old_locations;
210   old_locations.resize(locations->size());
211   locations->ExtractSubrange(0, locations->size(), old_locations.data());
212   locations->Reserve(old_locations.size() - indices_to_delete.size());
213   for (size_t i = 0; i < old_locations.size(); ++i) {
214     if (indices_to_delete.contains(i)) {
215       delete old_locations[i];
216     } else {
217       locations->AddAllocated(old_locations[i]);
218     }
219   }
220 }
221 
222 }  // namespace
223 
StripSourceRetentionOptions(const FileDescriptor & file,bool include_source_code_info)224 FileDescriptorProto StripSourceRetentionOptions(const FileDescriptor& file,
225                                                 bool include_source_code_info) {
226   FileDescriptorProto file_proto;
227   file.CopyTo(&file_proto);
228   if (include_source_code_info) {
229     file.CopySourceCodeInfoTo(&file_proto);
230   }
231   StripSourceRetentionOptions(*file.pool(), file_proto);
232   return file_proto;
233 }
234 
StripSourceRetentionOptions(const DescriptorPool & pool,FileDescriptorProto & file_proto)235 void StripSourceRetentionOptions(const DescriptorPool& pool,
236                                  FileDescriptorProto& file_proto) {
237   std::vector<std::vector<int>> stripped_paths;
238   ConvertToDynamicMessageAndStripOptions(file_proto, pool, &stripped_paths);
239   if (file_proto.has_source_code_info()) {
240     StripSourceCodeInfo(stripped_paths, *file_proto.mutable_source_code_info());
241   }
242 }
243 
StripSourceRetentionOptions(const Descriptor & message)244 DescriptorProto StripSourceRetentionOptions(const Descriptor& message) {
245   DescriptorProto message_proto;
246   message.CopyTo(&message_proto);
247   ConvertToDynamicMessageAndStripOptions(message_proto,
248                                          *message.file()->pool());
249   return message_proto;
250 }
251 
StripSourceRetentionOptions(const Descriptor & message,const Descriptor::ExtensionRange & range)252 DescriptorProto::ExtensionRange StripSourceRetentionOptions(
253     const Descriptor& message, const Descriptor::ExtensionRange& range) {
254   DescriptorProto::ExtensionRange range_proto;
255   range.CopyTo(&range_proto);
256   ConvertToDynamicMessageAndStripOptions(range_proto, *message.file()->pool());
257   return range_proto;
258 }
259 
StripSourceRetentionOptions(const EnumDescriptor & enm)260 EnumDescriptorProto StripSourceRetentionOptions(const EnumDescriptor& enm) {
261   EnumDescriptorProto enm_proto;
262   enm.CopyTo(&enm_proto);
263   ConvertToDynamicMessageAndStripOptions(enm_proto, *enm.file()->pool());
264   return enm_proto;
265 }
266 
StripSourceRetentionOptions(const FieldDescriptor & field)267 FieldDescriptorProto StripSourceRetentionOptions(const FieldDescriptor& field) {
268   FieldDescriptorProto field_proto;
269   field.CopyTo(&field_proto);
270   ConvertToDynamicMessageAndStripOptions(field_proto, *field.file()->pool());
271   return field_proto;
272 }
273 
StripSourceRetentionOptions(const OneofDescriptor & oneof)274 OneofDescriptorProto StripSourceRetentionOptions(const OneofDescriptor& oneof) {
275   OneofDescriptorProto oneof_proto;
276   oneof.CopyTo(&oneof_proto);
277   ConvertToDynamicMessageAndStripOptions(oneof_proto, *oneof.file()->pool());
278   return oneof_proto;
279 }
280 
StripLocalSourceRetentionOptions(const EnumDescriptor & descriptor)281 EnumOptions StripLocalSourceRetentionOptions(const EnumDescriptor& descriptor) {
282   return StripLocalOptions(descriptor);
283 }
284 
StripLocalSourceRetentionOptions(const EnumValueDescriptor & descriptor)285 EnumValueOptions StripLocalSourceRetentionOptions(
286     const EnumValueDescriptor& descriptor) {
287   return StripLocalOptions(descriptor);
288 }
289 
StripLocalSourceRetentionOptions(const FieldDescriptor & descriptor)290 FieldOptions StripLocalSourceRetentionOptions(
291     const FieldDescriptor& descriptor) {
292   return StripLocalOptions(descriptor);
293 }
294 
StripLocalSourceRetentionOptions(const FileDescriptor & descriptor)295 FileOptions StripLocalSourceRetentionOptions(const FileDescriptor& descriptor) {
296   return StripLocalOptions(descriptor);
297 }
298 
StripLocalSourceRetentionOptions(const Descriptor & descriptor)299 MessageOptions StripLocalSourceRetentionOptions(const Descriptor& descriptor) {
300   return StripLocalOptions(descriptor);
301 }
302 
StripLocalSourceRetentionOptions(const Descriptor & descriptor,const Descriptor::ExtensionRange & range)303 ExtensionRangeOptions StripLocalSourceRetentionOptions(
304     const Descriptor& descriptor, const Descriptor::ExtensionRange& range) {
305   ExtensionRangeOptions options = range.options();
306   ConvertToDynamicMessageAndStripOptions(options, GetPool(descriptor));
307   return options;
308 }
309 
StripLocalSourceRetentionOptions(const MethodDescriptor & descriptor)310 MethodOptions StripLocalSourceRetentionOptions(
311     const MethodDescriptor& descriptor) {
312   return StripLocalOptions(descriptor);
313 }
314 
StripLocalSourceRetentionOptions(const OneofDescriptor & descriptor)315 OneofOptions StripLocalSourceRetentionOptions(
316     const OneofDescriptor& descriptor) {
317   return StripLocalOptions(descriptor);
318 }
319 
StripLocalSourceRetentionOptions(const ServiceDescriptor & descriptor)320 ServiceOptions StripLocalSourceRetentionOptions(
321     const ServiceDescriptor& descriptor) {
322   return StripLocalOptions(descriptor);
323 }
324 
325 }  // namespace compiler
326 }  // namespace protobuf
327 }  // namespace google
328