1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34
35 #include <algorithm>
36 #include <google/protobuf/stubs/hash.h>
37 #include <limits>
38 #include <vector>
39
40 #include <google/protobuf/compiler/csharp/csharp_helpers.h>
41 #include <google/protobuf/descriptor.pb.h>
42 #include <google/protobuf/io/printer.h>
43 #include <google/protobuf/wire_format.h>
44 #include <google/protobuf/stubs/strutil.h>
45 #include <google/protobuf/stubs/substitute.h>
46
47 #include <google/protobuf/compiler/csharp/csharp_field_base.h>
48 #include <google/protobuf/compiler/csharp/csharp_enum_field.h>
49 #include <google/protobuf/compiler/csharp/csharp_map_field.h>
50 #include <google/protobuf/compiler/csharp/csharp_message_field.h>
51 #include <google/protobuf/compiler/csharp/csharp_options.h>
52 #include <google/protobuf/compiler/csharp/csharp_primitive_field.h>
53 #include <google/protobuf/compiler/csharp/csharp_repeated_enum_field.h>
54 #include <google/protobuf/compiler/csharp/csharp_repeated_message_field.h>
55 #include <google/protobuf/compiler/csharp/csharp_repeated_primitive_field.h>
56 #include <google/protobuf/compiler/csharp/csharp_wrapper_field.h>
57
58 namespace google {
59 namespace protobuf {
60 namespace compiler {
61 namespace csharp {
62
GetCSharpType(FieldDescriptor::Type type)63 CSharpType GetCSharpType(FieldDescriptor::Type type) {
64 switch (type) {
65 case FieldDescriptor::TYPE_INT32:
66 return CSHARPTYPE_INT32;
67 case FieldDescriptor::TYPE_INT64:
68 return CSHARPTYPE_INT64;
69 case FieldDescriptor::TYPE_UINT32:
70 return CSHARPTYPE_UINT32;
71 case FieldDescriptor::TYPE_UINT64:
72 return CSHARPTYPE_UINT32;
73 case FieldDescriptor::TYPE_SINT32:
74 return CSHARPTYPE_INT32;
75 case FieldDescriptor::TYPE_SINT64:
76 return CSHARPTYPE_INT64;
77 case FieldDescriptor::TYPE_FIXED32:
78 return CSHARPTYPE_UINT32;
79 case FieldDescriptor::TYPE_FIXED64:
80 return CSHARPTYPE_UINT64;
81 case FieldDescriptor::TYPE_SFIXED32:
82 return CSHARPTYPE_INT32;
83 case FieldDescriptor::TYPE_SFIXED64:
84 return CSHARPTYPE_INT64;
85 case FieldDescriptor::TYPE_FLOAT:
86 return CSHARPTYPE_FLOAT;
87 case FieldDescriptor::TYPE_DOUBLE:
88 return CSHARPTYPE_DOUBLE;
89 case FieldDescriptor::TYPE_BOOL:
90 return CSHARPTYPE_BOOL;
91 case FieldDescriptor::TYPE_ENUM:
92 return CSHARPTYPE_ENUM;
93 case FieldDescriptor::TYPE_STRING:
94 return CSHARPTYPE_STRING;
95 case FieldDescriptor::TYPE_BYTES:
96 return CSHARPTYPE_BYTESTRING;
97 case FieldDescriptor::TYPE_GROUP:
98 return CSHARPTYPE_MESSAGE;
99 case FieldDescriptor::TYPE_MESSAGE:
100 return CSHARPTYPE_MESSAGE;
101
102 // No default because we want the compiler to complain if any new
103 // types are added.
104 }
105 GOOGLE_LOG(FATAL)<< "Can't get here.";
106 return (CSharpType) -1;
107 }
108
StripDotProto(const std::string & proto_file)109 std::string StripDotProto(const std::string& proto_file) {
110 int lastindex = proto_file.find_last_of(".");
111 return proto_file.substr(0, lastindex);
112 }
113
GetFileNamespace(const FileDescriptor * descriptor)114 std::string GetFileNamespace(const FileDescriptor* descriptor) {
115 if (descriptor->options().has_csharp_namespace()) {
116 return descriptor->options().csharp_namespace();
117 }
118 return UnderscoresToCamelCase(descriptor->package(), true, true);
119 }
120
121 // Returns the Pascal-cased last part of the proto file. For example,
122 // input of "google/protobuf/foo_bar.proto" would result in "FooBar".
GetFileNameBase(const FileDescriptor * descriptor)123 std::string GetFileNameBase(const FileDescriptor* descriptor) {
124 std::string proto_file = descriptor->name();
125 int lastslash = proto_file.find_last_of("/");
126 std::string base = proto_file.substr(lastslash + 1);
127 return UnderscoresToPascalCase(StripDotProto(base));
128 }
129
GetReflectionClassUnqualifiedName(const FileDescriptor * descriptor)130 std::string GetReflectionClassUnqualifiedName(const FileDescriptor* descriptor) {
131 // TODO: Detect collisions with existing messages,
132 // and append an underscore if necessary.
133 return GetFileNameBase(descriptor) + "Reflection";
134 }
135
136 // TODO(jtattermusch): can we reuse a utility function?
UnderscoresToCamelCase(const std::string & input,bool cap_next_letter,bool preserve_period)137 std::string UnderscoresToCamelCase(const std::string& input,
138 bool cap_next_letter,
139 bool preserve_period) {
140 string result;
141 // Note: I distrust ctype.h due to locales.
142 for (int i = 0; i < input.size(); i++) {
143 if ('a' <= input[i] && input[i] <= 'z') {
144 if (cap_next_letter) {
145 result += input[i] + ('A' - 'a');
146 } else {
147 result += input[i];
148 }
149 cap_next_letter = false;
150 } else if ('A' <= input[i] && input[i] <= 'Z') {
151 if (i == 0 && !cap_next_letter) {
152 // Force first letter to lower-case unless explicitly told to
153 // capitalize it.
154 result += input[i] + ('a' - 'A');
155 } else {
156 // Capital letters after the first are left as-is.
157 result += input[i];
158 }
159 cap_next_letter = false;
160 } else if ('0' <= input[i] && input[i] <= '9') {
161 result += input[i];
162 cap_next_letter = true;
163 } else {
164 cap_next_letter = true;
165 if (input[i] == '.' && preserve_period) {
166 result += '.';
167 }
168 }
169 }
170 // Add a trailing "_" if the name should be altered.
171 if (input[input.size() - 1] == '#') {
172 result += '_';
173 }
174 return result;
175 }
176
UnderscoresToPascalCase(const std::string & input)177 std::string UnderscoresToPascalCase(const std::string& input) {
178 return UnderscoresToCamelCase(input, true);
179 }
180
181 // Convert a string which is expected to be SHOUTY_CASE (but may not be *precisely* shouty)
182 // into a PascalCase string. Precise rules implemented:
183
184 // Previous input character Current character Case
185 // Any Non-alphanumeric Skipped
186 // None - first char of input Alphanumeric Upper
187 // Non-letter (e.g. _ or 1) Alphanumeric Upper
188 // Numeric Alphanumeric Upper
189 // Lower letter Alphanumeric Same as current
190 // Upper letter Alphanumeric Lower
ShoutyToPascalCase(const std::string & input)191 std::string ShoutyToPascalCase(const std::string& input) {
192 string result;
193 // Simple way of implementing "always start with upper"
194 char previous = '_';
195 for (int i = 0; i < input.size(); i++) {
196 char current = input[i];
197 if (!ascii_isalnum(current)) {
198 previous = current;
199 continue;
200 }
201 if (!ascii_isalnum(previous)) {
202 result += ascii_toupper(current);
203 } else if (ascii_isdigit(previous)) {
204 result += ascii_toupper(current);
205 } else if (ascii_islower(previous)) {
206 result += current;
207 } else {
208 result += ascii_tolower(current);
209 }
210 previous = current;
211 }
212 return result;
213 }
214
215 // Attempt to remove a prefix from a value, ignoring casing and skipping underscores.
216 // (foo, foo_bar) => bar - underscore after prefix is skipped
217 // (FOO, foo_bar) => bar - casing is ignored
218 // (foo_bar, foobarbaz) => baz - underscore in prefix is ignored
219 // (foobar, foo_barbaz) => baz - underscore in value is ignored
220 // (foo, bar) => bar - prefix isn't matched; return original value
TryRemovePrefix(const std::string & prefix,const std::string & value)221 std::string TryRemovePrefix(const std::string& prefix, const std::string& value) {
222 // First normalize to a lower-case no-underscores prefix to match against
223 std::string prefix_to_match = "";
224 for (size_t i = 0; i < prefix.size(); i++) {
225 if (prefix[i] != '_') {
226 prefix_to_match += ascii_tolower(prefix[i]);
227 }
228 }
229
230 // This keeps track of how much of value we've consumed
231 size_t prefix_index, value_index;
232 for (prefix_index = 0, value_index = 0;
233 prefix_index < prefix_to_match.size() && value_index < value.size();
234 value_index++) {
235 // Skip over underscores in the value
236 if (value[value_index] == '_') {
237 continue;
238 }
239 if (ascii_tolower(value[value_index]) != prefix_to_match[prefix_index++]) {
240 // Failed to match the prefix - bail out early.
241 return value;
242 }
243 }
244
245 // If we didn't finish looking through the prefix, we can't strip it.
246 if (prefix_index < prefix_to_match.size()) {
247 return value;
248 }
249
250 // Step over any underscores after the prefix
251 while (value_index < value.size() && value[value_index] == '_') {
252 value_index++;
253 }
254
255 // If there's nothing left (e.g. it was a prefix with only underscores afterwards), don't strip.
256 if (value_index == value.size()) {
257 return value;
258 }
259
260 return value.substr(value_index);
261 }
262
263 // Format the enum value name in a pleasant way for C#:
264 // - Strip the enum name as a prefix if possible
265 // - Convert to PascalCase.
266 // For example, an enum called Color with a value of COLOR_BLUE should
267 // result in an enum value in C# called just Blue
GetEnumValueName(const std::string & enum_name,const std::string & enum_value_name)268 std::string GetEnumValueName(const std::string& enum_name, const std::string& enum_value_name) {
269 std::string stripped = TryRemovePrefix(enum_name, enum_value_name);
270 std::string result = ShoutyToPascalCase(stripped);
271 // Just in case we have an enum name of FOO and a value of FOO_2... make sure the returned
272 // string is a valid identifier.
273 if (ascii_isdigit(result[0])) {
274 result = "_" + result;
275 }
276 return result;
277 }
278
ToCSharpName(const std::string & name,const FileDescriptor * file)279 std::string ToCSharpName(const std::string& name, const FileDescriptor* file) {
280 std::string result = GetFileNamespace(file);
281 if (result != "") {
282 result += '.';
283 }
284 string classname;
285 if (file->package().empty()) {
286 classname = name;
287 } else {
288 // Strip the proto package from full_name since we've replaced it with
289 // the C# namespace.
290 classname = name.substr(file->package().size() + 1);
291 }
292 result += StringReplace(classname, ".", ".Types.", true);
293 return "global::" + result;
294 }
295
GetReflectionClassName(const FileDescriptor * descriptor)296 std::string GetReflectionClassName(const FileDescriptor* descriptor) {
297 std::string result = GetFileNamespace(descriptor);
298 if (!result.empty()) {
299 result += '.';
300 }
301 result += GetReflectionClassUnqualifiedName(descriptor);
302 return "global::" + result;
303 }
304
GetClassName(const Descriptor * descriptor)305 std::string GetClassName(const Descriptor* descriptor) {
306 return ToCSharpName(descriptor->full_name(), descriptor->file());
307 }
308
GetClassName(const EnumDescriptor * descriptor)309 std::string GetClassName(const EnumDescriptor* descriptor) {
310 return ToCSharpName(descriptor->full_name(), descriptor->file());
311 }
312
313 // Groups are hacky: The name of the field is just the lower-cased name
314 // of the group type. In C#, though, we would like to retain the original
315 // capitalization of the type name.
GetFieldName(const FieldDescriptor * descriptor)316 std::string GetFieldName(const FieldDescriptor* descriptor) {
317 if (descriptor->type() == FieldDescriptor::TYPE_GROUP) {
318 return descriptor->message_type()->name();
319 } else {
320 return descriptor->name();
321 }
322 }
323
GetFieldConstantName(const FieldDescriptor * field)324 std::string GetFieldConstantName(const FieldDescriptor* field) {
325 return GetPropertyName(field) + "FieldNumber";
326 }
327
GetPropertyName(const FieldDescriptor * descriptor)328 std::string GetPropertyName(const FieldDescriptor* descriptor) {
329 // TODO(jtattermusch): consider introducing csharp_property_name field option
330 std::string property_name = UnderscoresToPascalCase(GetFieldName(descriptor));
331 // Avoid either our own type name or reserved names. Note that not all names
332 // are reserved - a field called to_string, write_to etc would still cause a problem.
333 // There are various ways of ending up with naming collisions, but we try to avoid obvious
334 // ones.
335 if (property_name == descriptor->containing_type()->name()
336 || property_name == "Types"
337 || property_name == "Descriptor") {
338 property_name += "_";
339 }
340 return property_name;
341 }
342
GetOutputFile(const google::protobuf::FileDescriptor * descriptor,const std::string file_extension,const bool generate_directories,const std::string base_namespace,string * error)343 std::string GetOutputFile(
344 const google::protobuf::FileDescriptor* descriptor,
345 const std::string file_extension,
346 const bool generate_directories,
347 const std::string base_namespace,
348 string* error) {
349 string relative_filename = GetFileNameBase(descriptor) + file_extension;
350 if (!generate_directories) {
351 return relative_filename;
352 }
353 string ns = GetFileNamespace(descriptor);
354 string namespace_suffix = ns;
355 if (!base_namespace.empty()) {
356 // Check that the base_namespace is either equal to or a leading part of
357 // the file namespace. This isn't just a simple prefix; "Foo.B" shouldn't
358 // be regarded as a prefix of "Foo.Bar". The simplest option is to add "."
359 // to both.
360 string extended_ns = ns + ".";
361 if (extended_ns.find(base_namespace + ".") != 0) {
362 *error = "Namespace " + ns + " is not a prefix namespace of base namespace " + base_namespace;
363 return ""; // This will be ignored, because we've set an error.
364 }
365 namespace_suffix = ns.substr(base_namespace.length());
366 if (namespace_suffix.find(".") == 0) {
367 namespace_suffix = namespace_suffix.substr(1);
368 }
369 }
370
371 string namespace_dir = StringReplace(namespace_suffix, ".", "/", true);
372 if (!namespace_dir.empty()) {
373 namespace_dir += "/";
374 }
375 return namespace_dir + relative_filename;
376 }
377
378 // TODO: c&p from Java protoc plugin
379 // For encodings with fixed sizes, returns that size in bytes. Otherwise
380 // returns -1.
GetFixedSize(FieldDescriptor::Type type)381 int GetFixedSize(FieldDescriptor::Type type) {
382 switch (type) {
383 case FieldDescriptor::TYPE_INT32 : return -1;
384 case FieldDescriptor::TYPE_INT64 : return -1;
385 case FieldDescriptor::TYPE_UINT32 : return -1;
386 case FieldDescriptor::TYPE_UINT64 : return -1;
387 case FieldDescriptor::TYPE_SINT32 : return -1;
388 case FieldDescriptor::TYPE_SINT64 : return -1;
389 case FieldDescriptor::TYPE_FIXED32 : return internal::WireFormatLite::kFixed32Size;
390 case FieldDescriptor::TYPE_FIXED64 : return internal::WireFormatLite::kFixed64Size;
391 case FieldDescriptor::TYPE_SFIXED32: return internal::WireFormatLite::kSFixed32Size;
392 case FieldDescriptor::TYPE_SFIXED64: return internal::WireFormatLite::kSFixed64Size;
393 case FieldDescriptor::TYPE_FLOAT : return internal::WireFormatLite::kFloatSize;
394 case FieldDescriptor::TYPE_DOUBLE : return internal::WireFormatLite::kDoubleSize;
395
396 case FieldDescriptor::TYPE_BOOL : return internal::WireFormatLite::kBoolSize;
397 case FieldDescriptor::TYPE_ENUM : return -1;
398
399 case FieldDescriptor::TYPE_STRING : return -1;
400 case FieldDescriptor::TYPE_BYTES : return -1;
401 case FieldDescriptor::TYPE_GROUP : return -1;
402 case FieldDescriptor::TYPE_MESSAGE : return -1;
403
404 // No default because we want the compiler to complain if any new
405 // types are added.
406 }
407 GOOGLE_LOG(FATAL) << "Can't get here.";
408 return -1;
409 }
410
411 static const char base64_chars[] =
412 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
413
StringToBase64(const std::string & input)414 std::string StringToBase64(const std::string& input) {
415 std::string result;
416 size_t remaining = input.size();
417 const unsigned char *src = (const unsigned char*) input.c_str();
418 while (remaining > 2) {
419 result += base64_chars[src[0] >> 2];
420 result += base64_chars[((src[0] & 0x3) << 4) | (src[1] >> 4)];
421 result += base64_chars[((src[1] & 0xf) << 2) | (src[2] >> 6)];
422 result += base64_chars[src[2] & 0x3f];
423 remaining -= 3;
424 src += 3;
425 }
426 switch (remaining) {
427 case 2:
428 result += base64_chars[src[0] >> 2];
429 result += base64_chars[((src[0] & 0x3) << 4) | (src[1] >> 4)];
430 result += base64_chars[(src[1] & 0xf) << 2];
431 result += '=';
432 src += 2;
433 break;
434 case 1:
435 result += base64_chars[src[0] >> 2];
436 result += base64_chars[((src[0] & 0x3) << 4)];
437 result += '=';
438 result += '=';
439 src += 1;
440 break;
441 }
442 return result;
443 }
444
FileDescriptorToBase64(const FileDescriptor * descriptor)445 std::string FileDescriptorToBase64(const FileDescriptor* descriptor) {
446 std::string fdp_bytes;
447 FileDescriptorProto fdp;
448 descriptor->CopyTo(&fdp);
449 fdp.SerializeToString(&fdp_bytes);
450 return StringToBase64(fdp_bytes);
451 }
452
CreateFieldGenerator(const FieldDescriptor * descriptor,int fieldOrdinal,const Options * options)453 FieldGeneratorBase* CreateFieldGenerator(const FieldDescriptor* descriptor,
454 int fieldOrdinal,
455 const Options* options) {
456 switch (descriptor->type()) {
457 case FieldDescriptor::TYPE_GROUP:
458 case FieldDescriptor::TYPE_MESSAGE:
459 if (descriptor->is_repeated()) {
460 if (descriptor->is_map()) {
461 return new MapFieldGenerator(descriptor, fieldOrdinal, options);
462 } else {
463 return new RepeatedMessageFieldGenerator(descriptor, fieldOrdinal, options);
464 }
465 } else {
466 if (IsWrapperType(descriptor)) {
467 if (descriptor->containing_oneof()) {
468 return new WrapperOneofFieldGenerator(descriptor, fieldOrdinal, options);
469 } else {
470 return new WrapperFieldGenerator(descriptor, fieldOrdinal, options);
471 }
472 } else {
473 if (descriptor->containing_oneof()) {
474 return new MessageOneofFieldGenerator(descriptor, fieldOrdinal, options);
475 } else {
476 return new MessageFieldGenerator(descriptor, fieldOrdinal, options);
477 }
478 }
479 }
480 case FieldDescriptor::TYPE_ENUM:
481 if (descriptor->is_repeated()) {
482 return new RepeatedEnumFieldGenerator(descriptor, fieldOrdinal, options);
483 } else {
484 if (descriptor->containing_oneof()) {
485 return new EnumOneofFieldGenerator(descriptor, fieldOrdinal, options);
486 } else {
487 return new EnumFieldGenerator(descriptor, fieldOrdinal, options);
488 }
489 }
490 default:
491 if (descriptor->is_repeated()) {
492 return new RepeatedPrimitiveFieldGenerator(descriptor, fieldOrdinal, options);
493 } else {
494 if (descriptor->containing_oneof()) {
495 return new PrimitiveOneofFieldGenerator(descriptor, fieldOrdinal, options);
496 } else {
497 return new PrimitiveFieldGenerator(descriptor, fieldOrdinal, options);
498 }
499 }
500 }
501 }
502
503 } // namespace csharp
504 } // namespace compiler
505 } // namespace protobuf
506 } // namespace google
507