• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 // Author: kenton@google.com (Kenton Varda)
9 //  Based on original Protocol Buffers design by
10 //  Sanjay Ghemawat, Jeff Dean, and others.
11 
12 #include "google/protobuf/compiler/csharp/csharp_helpers.h"
13 
14 #include <algorithm>
15 #include <limits>
16 #include <sstream>
17 #include <string>
18 #include <vector>
19 
20 #include "absl/container/flat_hash_set.h"
21 #include "absl/log/absl_log.h"
22 #include "absl/strings/ascii.h"
23 #include "absl/strings/str_replace.h"
24 #include "absl/strings/string_view.h"
25 #include "google/protobuf/compiler/csharp/csharp_enum_field.h"
26 #include "google/protobuf/compiler/csharp/csharp_field_base.h"
27 #include "google/protobuf/compiler/csharp/csharp_generator.h"
28 #include "google/protobuf/compiler/csharp/csharp_map_field.h"
29 #include "google/protobuf/compiler/csharp/csharp_message_field.h"
30 #include "google/protobuf/compiler/csharp/csharp_options.h"
31 #include "google/protobuf/compiler/csharp/csharp_primitive_field.h"
32 #include "google/protobuf/compiler/csharp/csharp_repeated_enum_field.h"
33 #include "google/protobuf/compiler/csharp/csharp_repeated_message_field.h"
34 #include "google/protobuf/compiler/csharp/csharp_repeated_primitive_field.h"
35 #include "google/protobuf/compiler/csharp/csharp_wrapper_field.h"
36 #include "google/protobuf/compiler/csharp/names.h"
37 #include "google/protobuf/compiler/retention.h"
38 #include "google/protobuf/descriptor.h"
39 #include "google/protobuf/descriptor.pb.h"
40 
41 // Must be last.
42 #include "google/protobuf/port_def.inc"
43 
44 namespace google {
45 namespace protobuf {
46 namespace compiler {
47 namespace csharp {
48 
GetCSharpType(FieldDescriptor::Type type)49 CSharpType GetCSharpType(FieldDescriptor::Type type) {
50   switch (type) {
51     case FieldDescriptor::TYPE_INT32:
52       return CSHARPTYPE_INT32;
53     case FieldDescriptor::TYPE_INT64:
54       return CSHARPTYPE_INT64;
55     case FieldDescriptor::TYPE_UINT32:
56       return CSHARPTYPE_UINT32;
57     case FieldDescriptor::TYPE_UINT64:
58       return CSHARPTYPE_UINT32;
59     case FieldDescriptor::TYPE_SINT32:
60       return CSHARPTYPE_INT32;
61     case FieldDescriptor::TYPE_SINT64:
62       return CSHARPTYPE_INT64;
63     case FieldDescriptor::TYPE_FIXED32:
64       return CSHARPTYPE_UINT32;
65     case FieldDescriptor::TYPE_FIXED64:
66       return CSHARPTYPE_UINT64;
67     case FieldDescriptor::TYPE_SFIXED32:
68       return CSHARPTYPE_INT32;
69     case FieldDescriptor::TYPE_SFIXED64:
70       return CSHARPTYPE_INT64;
71     case FieldDescriptor::TYPE_FLOAT:
72       return CSHARPTYPE_FLOAT;
73     case FieldDescriptor::TYPE_DOUBLE:
74       return CSHARPTYPE_DOUBLE;
75     case FieldDescriptor::TYPE_BOOL:
76       return CSHARPTYPE_BOOL;
77     case FieldDescriptor::TYPE_ENUM:
78       return CSHARPTYPE_ENUM;
79     case FieldDescriptor::TYPE_STRING:
80       return CSHARPTYPE_STRING;
81     case FieldDescriptor::TYPE_BYTES:
82       return CSHARPTYPE_BYTESTRING;
83     case FieldDescriptor::TYPE_GROUP:
84       return CSHARPTYPE_MESSAGE;
85     case FieldDescriptor::TYPE_MESSAGE:
86       return CSHARPTYPE_MESSAGE;
87 
88       // No default because we want the compiler to complain if any new
89       // types are added.
90   }
91   ABSL_LOG(FATAL) << "Can't get here.";
92   return (CSharpType)-1;
93 }
94 
95 // Convert a string which is expected to be SHOUTY_CASE (but may not be
96 // *precisely* shouty) into a PascalCase string. Precise rules implemented:
97 
98 // Previous input character      Current character         Case
99 // Any                           Non-alphanumeric          Skipped
100 // None - first char of input    Alphanumeric              Upper
101 // Non-letter (e.g. _ or 1)      Alphanumeric              Upper
102 // Numeric                       Alphanumeric              Upper
103 // Lower letter                  Alphanumeric              Same as current
104 // Upper letter                  Alphanumeric              Lower
ShoutyToPascalCase(absl::string_view input)105 std::string ShoutyToPascalCase(absl::string_view input) {
106   std::string result;
107   // Simple way of implementing "always start with upper"
108   char previous = '_';
109   for (int i = 0; i < input.size(); i++) {
110     char current = input[i];
111     if (!absl::ascii_isalnum(current)) {
112       previous = current;
113       continue;
114     }
115     if (!absl::ascii_isalnum(previous)) {
116       result += absl::ascii_toupper(current);
117     } else if (absl::ascii_isdigit(previous)) {
118       result += absl::ascii_toupper(current);
119     } else if (absl::ascii_islower(previous)) {
120       result += current;
121     } else {
122       result += absl::ascii_tolower(current);
123     }
124     previous = current;
125   }
126   return result;
127 }
128 
129 // Attempt to remove a prefix from a value, ignoring casing and skipping
130 // underscores. (foo, foo_bar) => bar - underscore after prefix is skipped (FOO,
131 // foo_bar) => bar - casing is ignored (foo_bar, foobarbaz) => baz - underscore
132 // in prefix is ignored (foobar, foo_barbaz) => baz - underscore in value is
133 // ignored (foo, bar) => bar - prefix isn't matched; return original value
TryRemovePrefix(absl::string_view prefix,absl::string_view value)134 std::string TryRemovePrefix(absl::string_view prefix, absl::string_view value) {
135   // First normalize to a lower-case no-underscores prefix to match against
136   std::string prefix_to_match = "";
137   for (size_t i = 0; i < prefix.size(); i++) {
138     if (prefix[i] != '_') {
139       prefix_to_match += absl::ascii_tolower(prefix[i]);
140     }
141   }
142 
143   // This keeps track of how much of value we've consumed
144   size_t prefix_index, value_index;
145   for (prefix_index = 0, value_index = 0;
146        prefix_index < prefix_to_match.size() && value_index < value.size();
147        value_index++) {
148     // Skip over underscores in the value
149     if (value[value_index] == '_') {
150       continue;
151     }
152     if (absl::ascii_tolower(value[value_index]) !=
153         prefix_to_match[prefix_index++]) {
154       // Failed to match the prefix - bail out early.
155       return std::string(value);
156     }
157   }
158 
159   // If we didn't finish looking through the prefix, we can't strip it.
160   if (prefix_index < prefix_to_match.size()) {
161     return std::string(value);
162   }
163 
164   // Step over any underscores after the prefix
165   while (value_index < value.size() && value[value_index] == '_') {
166     value_index++;
167   }
168 
169   // If there's nothing left (e.g. it was a prefix with only underscores
170   // afterwards), don't strip.
171   if (value_index == value.size()) {
172     return std::string(value);
173   }
174 
175   return std::string(value.substr(value_index));
176 }
177 
178 // Format the enum value name in a pleasant way for C#:
179 // - Strip the enum name as a prefix if possible
180 // - Convert to PascalCase.
181 // For example, an enum called Color with a value of COLOR_BLUE should
182 // result in an enum value in C# called just Blue
GetEnumValueName(absl::string_view enum_name,absl::string_view enum_value_name)183 std::string GetEnumValueName(absl::string_view enum_name,
184                              absl::string_view enum_value_name) {
185   std::string stripped = TryRemovePrefix(enum_name, enum_value_name);
186   std::string result = ShoutyToPascalCase(stripped);
187   // Just in case we have an enum name of FOO and a value of FOO_2... make sure
188   // the returned string is a valid identifier.
189   if (absl::ascii_isdigit(result[0])) {
190     return absl::StrCat("_", result);
191   }
192   return result;
193 }
194 
GetFullExtensionName(const FieldDescriptor * descriptor)195 std::string GetFullExtensionName(const FieldDescriptor* descriptor) {
196   if (descriptor->extension_scope()) {
197     return absl::StrCat(GetClassName(descriptor->extension_scope()),
198                         ".Extensions.", GetPropertyName(descriptor));
199   }
200 
201   return absl::StrCat(GetExtensionClassUnqualifiedName(descriptor->file()), ".",
202                       GetPropertyName(descriptor));
203 }
204 
205 // Groups in proto2 are hacky: The name of the field is just the lower-cased
206 // name of the group type. In C#, though, we would like to retain the original
207 // capitalization of the type name. Fields with an encoding of "delimited" in
208 // editions are like groups, but have a real name, so we use that.
GetFieldName(const FieldDescriptor * descriptor)209 std::string GetFieldName(const FieldDescriptor* descriptor) {
210   if (internal::cpp::IsGroupLike(*descriptor)) {
211     return std::string(descriptor->message_type()->name());
212   } else {
213     return std::string(descriptor->name());
214   }
215 }
216 
GetFieldConstantName(const FieldDescriptor * field)217 std::string GetFieldConstantName(const FieldDescriptor* field) {
218   return absl::StrCat(GetPropertyName(field), "FieldNumber");
219 }
220 
GetPropertyName(const FieldDescriptor * descriptor)221 std::string GetPropertyName(const FieldDescriptor* descriptor) {
222   // Names of members declared or overridden in the message.
223   static const auto& reserved_member_names =
224       *new absl::flat_hash_set<absl::string_view>(
225           {"Types", "Descriptor", "Equals", "ToString", "GetHashCode",
226            "WriteTo", "Clone", "CalculateSize", "MergeFrom", "OnConstruction",
227            "Parser"});
228 
229   // TODO: consider introducing csharp_property_name field option
230   std::string property_name = UnderscoresToPascalCase(GetFieldName(descriptor));
231   // Avoid either our own type name or reserved names.
232   // There are various ways of ending up with naming collisions, but we try to
233   // avoid obvious ones. In particular, we avoid the names of all the members we
234   // generate. Note that we *don't* add an underscore for MemberwiseClone or
235   // GetType. Those generate warnings, but not errors; changing the name now
236   // could be a breaking change.
237   if (property_name == descriptor->containing_type()->name() ||
238       reserved_member_names.find(property_name) !=
239           reserved_member_names.end()) {
240     absl::StrAppend(&property_name, "_");
241   }
242   return property_name;
243 }
244 
GetOneofCaseName(const FieldDescriptor * descriptor)245 std::string GetOneofCaseName(const FieldDescriptor* descriptor) {
246   // The name in a oneof case enum is the same as for the property, but as we
247   // always have a "None" value as well, we need to reserve that by appending an
248   // underscore.
249   std::string property_name = GetPropertyName(descriptor);
250   return property_name == "None" ? "None_" : property_name;
251 }
252 
253 // TODO: c&p from Java protoc plugin
254 // For encodings with fixed sizes, returns that size in bytes.  Otherwise
255 // returns -1.
GetFixedSize(FieldDescriptor::Type type)256 int GetFixedSize(FieldDescriptor::Type type) {
257   switch (type) {
258     case FieldDescriptor::TYPE_INT32:
259       return -1;
260     case FieldDescriptor::TYPE_INT64:
261       return -1;
262     case FieldDescriptor::TYPE_UINT32:
263       return -1;
264     case FieldDescriptor::TYPE_UINT64:
265       return -1;
266     case FieldDescriptor::TYPE_SINT32:
267       return -1;
268     case FieldDescriptor::TYPE_SINT64:
269       return -1;
270     case FieldDescriptor::TYPE_FIXED32:
271       return internal::WireFormatLite::kFixed32Size;
272     case FieldDescriptor::TYPE_FIXED64:
273       return internal::WireFormatLite::kFixed64Size;
274     case FieldDescriptor::TYPE_SFIXED32:
275       return internal::WireFormatLite::kSFixed32Size;
276     case FieldDescriptor::TYPE_SFIXED64:
277       return internal::WireFormatLite::kSFixed64Size;
278     case FieldDescriptor::TYPE_FLOAT:
279       return internal::WireFormatLite::kFloatSize;
280     case FieldDescriptor::TYPE_DOUBLE:
281       return internal::WireFormatLite::kDoubleSize;
282 
283     case FieldDescriptor::TYPE_BOOL:
284       return internal::WireFormatLite::kBoolSize;
285     case FieldDescriptor::TYPE_ENUM:
286       return -1;
287 
288     case FieldDescriptor::TYPE_STRING:
289       return -1;
290     case FieldDescriptor::TYPE_BYTES:
291       return -1;
292     case FieldDescriptor::TYPE_GROUP:
293       return -1;
294     case FieldDescriptor::TYPE_MESSAGE:
295       return -1;
296 
297       // No default because we want the compiler to complain if any new
298       // types are added.
299   }
300   ABSL_LOG(FATAL) << "Can't get here.";
301   return -1;
302 }
303 
304 static const char base64_chars[] =
305     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
306 
StringToBase64(absl::string_view input)307 std::string StringToBase64(absl::string_view input) {
308   std::string result;
309   size_t remaining = input.size();
310   const unsigned char* src = (const unsigned char*)input.data();
311   while (remaining > 2) {
312     result += base64_chars[src[0] >> 2];
313     result += base64_chars[((src[0] & 0x3) << 4) | (src[1] >> 4)];
314     result += base64_chars[((src[1] & 0xf) << 2) | (src[2] >> 6)];
315     result += base64_chars[src[2] & 0x3f];
316     remaining -= 3;
317     src += 3;
318   }
319   switch (remaining) {
320     case 2:
321       result += base64_chars[src[0] >> 2];
322       result += base64_chars[((src[0] & 0x3) << 4) | (src[1] >> 4)];
323       result += base64_chars[(src[1] & 0xf) << 2];
324       result += '=';
325       src += 2;
326       break;
327     case 1:
328       result += base64_chars[src[0] >> 2];
329       result += base64_chars[((src[0] & 0x3) << 4)];
330       result += '=';
331       result += '=';
332       src += 1;
333       break;
334   }
335   return result;
336 }
337 
FileDescriptorToBase64(const FileDescriptor * descriptor)338 std::string FileDescriptorToBase64(const FileDescriptor* descriptor) {
339   std::string fdp_bytes;
340   FileDescriptorProto fdp = StripSourceRetentionOptions(*descriptor);
341   fdp.SerializeToString(&fdp_bytes);
342   return StringToBase64(fdp_bytes);
343 }
344 
CreateFieldGenerator(const FieldDescriptor * descriptor,int presenceIndex,const Options * options)345 FieldGeneratorBase* CreateFieldGenerator(const FieldDescriptor* descriptor,
346                                          int presenceIndex,
347                                          const Options* options) {
348   switch (descriptor->type()) {
349     case FieldDescriptor::TYPE_GROUP:
350     case FieldDescriptor::TYPE_MESSAGE:
351       if (descriptor->is_repeated()) {
352         if (descriptor->is_map()) {
353           return new MapFieldGenerator(descriptor, presenceIndex, options);
354         } else {
355           return new RepeatedMessageFieldGenerator(descriptor, presenceIndex,
356                                                    options);
357         }
358       } else {
359         if (IsWrapperType(descriptor)) {
360           if (descriptor->real_containing_oneof()) {
361             return new WrapperOneofFieldGenerator(descriptor, presenceIndex,
362                                                   options);
363           } else {
364             return new WrapperFieldGenerator(descriptor, presenceIndex,
365                                              options);
366           }
367         } else {
368           if (descriptor->real_containing_oneof()) {
369             return new MessageOneofFieldGenerator(descriptor, presenceIndex,
370                                                   options);
371           } else {
372             return new MessageFieldGenerator(descriptor, presenceIndex,
373                                              options);
374           }
375         }
376       }
377     case FieldDescriptor::TYPE_ENUM:
378       if (descriptor->is_repeated()) {
379         return new RepeatedEnumFieldGenerator(descriptor, presenceIndex,
380                                               options);
381       } else {
382         if (descriptor->real_containing_oneof()) {
383           return new EnumOneofFieldGenerator(descriptor, presenceIndex,
384                                              options);
385         } else {
386           return new EnumFieldGenerator(descriptor, presenceIndex, options);
387         }
388       }
389     default:
390       if (descriptor->is_repeated()) {
391         return new RepeatedPrimitiveFieldGenerator(descriptor, presenceIndex,
392                                                    options);
393       } else {
394         if (descriptor->real_containing_oneof()) {
395           return new PrimitiveOneofFieldGenerator(descriptor, presenceIndex,
396                                                   options);
397         } else {
398           return new PrimitiveFieldGenerator(descriptor, presenceIndex,
399                                              options);
400         }
401       }
402   }
403 }
404 
IsNullable(const FieldDescriptor * descriptor)405 bool IsNullable(const FieldDescriptor* descriptor) {
406   if (descriptor->is_repeated()) {
407     return true;
408   }
409 
410   switch (descriptor->type()) {
411     case FieldDescriptor::TYPE_ENUM:
412     case FieldDescriptor::TYPE_DOUBLE:
413     case FieldDescriptor::TYPE_FLOAT:
414     case FieldDescriptor::TYPE_INT64:
415     case FieldDescriptor::TYPE_UINT64:
416     case FieldDescriptor::TYPE_INT32:
417     case FieldDescriptor::TYPE_FIXED64:
418     case FieldDescriptor::TYPE_FIXED32:
419     case FieldDescriptor::TYPE_BOOL:
420     case FieldDescriptor::TYPE_UINT32:
421     case FieldDescriptor::TYPE_SFIXED32:
422     case FieldDescriptor::TYPE_SFIXED64:
423     case FieldDescriptor::TYPE_SINT32:
424     case FieldDescriptor::TYPE_SINT64:
425       return false;
426 
427     case FieldDescriptor::TYPE_MESSAGE:
428     case FieldDescriptor::TYPE_GROUP:
429     case FieldDescriptor::TYPE_STRING:
430     case FieldDescriptor::TYPE_BYTES:
431       return true;
432 
433     default:
434       ABSL_LOG(FATAL) << "Unknown field type.";
435       return true;
436   }
437 }
438 
439 }  // namespace csharp
440 }  // namespace compiler
441 }  // namespace protobuf
442 }  // namespace google
443 
444 #include "google/protobuf/port_undef.inc"
445