• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 #ifndef _MSC_VER
32 #include <unistd.h>
33 #endif
34 #include <climits>
35 #include <errno.h>
36 #include <fcntl.h>
37 #include <fstream>
38 #include <iostream>
39 #include <sstream>
40 #include <stdlib.h>
41 #include <unordered_set>
42 #include <vector>
43 
44 #include <google/protobuf/compiler/objectivec/objectivec_helpers.h>
45 #include <google/protobuf/compiler/objectivec/objectivec_nsobject_methods.h>
46 #include <google/protobuf/descriptor.pb.h>
47 #include <google/protobuf/io/coded_stream.h>
48 #include <google/protobuf/io/printer.h>
49 #include <google/protobuf/io/zero_copy_stream_impl.h>
50 #include <google/protobuf/io/io_win32.h>
51 #include <google/protobuf/port.h>
52 #include <google/protobuf/stubs/common.h>
53 #include <google/protobuf/stubs/strutil.h>
54 
55 // NOTE: src/google/protobuf/compiler/plugin.cc makes use of cerr for some
56 // error cases, so it seems to be ok to use as a back door for errors.
57 
58 namespace google {
59 namespace protobuf {
60 namespace compiler {
61 namespace objectivec {
62 
63 // <io.h> is transitively included in this file. Import the functions explicitly
64 // in this port namespace to avoid ambiguous definition.
65 namespace posix {
66 #ifdef _WIN32
67 using ::google::protobuf::io::win32::open;
68 #else
69 using ::open;
70 #endif
71 }  // namespace port
72 
Options()73 Options::Options() {
74   // Default is the value of the env for the package prefixes.
75   const char* file_path = getenv("GPB_OBJC_EXPECTED_PACKAGE_PREFIXES");
76   if (file_path) {
77     expected_prefixes_path = file_path;
78   }
79   const char* suppressions = getenv("GPB_OBJC_EXPECTED_PACKAGE_PREFIXES_SUPPRESSIONS");
80   if (suppressions) {
81     SplitStringUsing(suppressions, ";", &expected_prefixes_suppressions);
82   }
83 }
84 
85 namespace {
86 
MakeWordsMap(const char * const words[],size_t num_words)87 std::unordered_set<string> MakeWordsMap(const char* const words[], size_t num_words) {
88   std::unordered_set<string> result;
89   for (int i = 0; i < num_words; i++) {
90     result.insert(words[i]);
91   }
92   return result;
93 }
94 
95 const char* const kUpperSegmentsList[] = {"url", "http", "https"};
96 
97 std::unordered_set<string> kUpperSegments =
98     MakeWordsMap(kUpperSegmentsList, GOOGLE_ARRAYSIZE(kUpperSegmentsList));
99 
ascii_isnewline(char c)100 bool ascii_isnewline(char c) {
101   return c == '\n' || c == '\r';
102 }
103 
104 // Internal helper for name handing.
105 // Do not expose this outside of helpers, stick to having functions for specific
106 // cases (ClassName(), FieldName()), so there is always consistent suffix rules.
UnderscoresToCamelCase(const string & input,bool first_capitalized)107 string UnderscoresToCamelCase(const string& input, bool first_capitalized) {
108   std::vector<string> values;
109   string current;
110 
111   bool last_char_was_number = false;
112   bool last_char_was_lower = false;
113   bool last_char_was_upper = false;
114   for (int i = 0; i < input.size(); i++) {
115     char c = input[i];
116     if (ascii_isdigit(c)) {
117       if (!last_char_was_number) {
118         values.push_back(current);
119         current = "";
120       }
121       current += c;
122       last_char_was_number = last_char_was_lower = last_char_was_upper = false;
123       last_char_was_number = true;
124     } else if (ascii_islower(c)) {
125       // lowercase letter can follow a lowercase or uppercase letter
126       if (!last_char_was_lower && !last_char_was_upper) {
127         values.push_back(current);
128         current = "";
129       }
130       current += c;  // already lower
131       last_char_was_number = last_char_was_lower = last_char_was_upper = false;
132       last_char_was_lower = true;
133     } else if (ascii_isupper(c)) {
134       if (!last_char_was_upper) {
135         values.push_back(current);
136         current = "";
137       }
138       current += ascii_tolower(c);
139       last_char_was_number = last_char_was_lower = last_char_was_upper = false;
140       last_char_was_upper = true;
141     } else {
142       last_char_was_number = last_char_was_lower = last_char_was_upper = false;
143     }
144   }
145   values.push_back(current);
146 
147   string result;
148   bool first_segment_forces_upper = false;
149   for (std::vector<string>::iterator i = values.begin(); i != values.end(); ++i) {
150     string value = *i;
151     bool all_upper = (kUpperSegments.count(value) > 0);
152     if (all_upper && (result.length() == 0)) {
153       first_segment_forces_upper = true;
154     }
155     for (int j = 0; j < value.length(); j++) {
156       if (j == 0 || all_upper) {
157         value[j] = ascii_toupper(value[j]);
158       } else {
159         // Nothing, already in lower.
160       }
161     }
162     result += value;
163   }
164   if ((result.length() != 0) &&
165       !first_capitalized &&
166       !first_segment_forces_upper) {
167     result[0] = ascii_tolower(result[0]);
168   }
169   return result;
170 }
171 
172 const char* const kReservedWordList[] = {
173   // Note NSObject Methods:
174   // These are brought in from objectivec_nsobject_methods.h that is generated
175   // using method_dump.sh. See kNSObjectMethods below.
176 
177   // Objective C "keywords" that aren't in C
178   // From
179   // http://stackoverflow.com/questions/1873630/reserved-keywords-in-objective-c
180   // with some others added on.
181   "id", "_cmd", "super", "in", "out", "inout", "bycopy", "byref", "oneway",
182   "self", "instancetype", "nullable", "nonnull", "nil", "Nil",
183   "YES", "NO", "weak",
184 
185   // C/C++ keywords (Incl C++ 0x11)
186   // From http://en.cppreference.com/w/cpp/keywords
187   "and", "and_eq", "alignas", "alignof", "asm", "auto", "bitand", "bitor",
188   "bool", "break", "case", "catch", "char", "char16_t", "char32_t", "class",
189   "compl", "const", "constexpr", "const_cast", "continue", "decltype",
190   "default", "delete", "double", "dynamic_cast", "else", "enum", "explicit",
191   "export", "extern ", "false", "float", "for", "friend", "goto", "if",
192   "inline", "int", "long", "mutable", "namespace", "new", "noexcept", "not",
193   "not_eq", "nullptr", "operator", "or", "or_eq", "private", "protected",
194   "public", "register", "reinterpret_cast", "return", "short", "signed",
195   "sizeof", "static", "static_assert", "static_cast", "struct", "switch",
196   "template", "this", "thread_local", "throw", "true", "try", "typedef",
197   "typeid", "typename", "union", "unsigned", "using", "virtual", "void",
198   "volatile", "wchar_t", "while", "xor", "xor_eq",
199 
200   // C99 keywords
201   // From
202   // http://publib.boulder.ibm.com/infocenter/lnxpcomp/v8v101/index.jsp?topic=%2Fcom.ibm.xlcpp8l.doc%2Flanguage%2Fref%2Fkeyw.htm
203   "restrict",
204 
205   // GCC/Clang extension
206   "typeof",
207 
208   // Not a keyword, but will break you
209   "NULL",
210 
211   // Objective-C Runtime typedefs
212   // From <obc/runtime.h>
213   "Category", "Ivar", "Method", "Protocol",
214 
215   // GPBMessage Methods
216   // Only need to add instance methods that may conflict with
217   // method declared in protos. The main cases are methods
218   // that take no arguments, or setFoo:/hasFoo: type methods.
219   "clear", "data", "delimitedData", "descriptor", "extensionRegistry",
220   "extensionsCurrentlySet", "initialized", "isInitialized", "serializedSize",
221   "sortedExtensionsInUse", "unknownFields",
222 
223   // MacTypes.h names
224   "Fixed", "Fract", "Size", "LogicalAddress", "PhysicalAddress", "ByteCount",
225   "ByteOffset", "Duration", "AbsoluteTime", "OptionBits", "ItemCount",
226   "PBVersion", "ScriptCode", "LangCode", "RegionCode", "OSType",
227   "ProcessSerialNumber", "Point", "Rect", "FixedPoint", "FixedRect", "Style",
228   "StyleParameter", "StyleField", "TimeScale", "TimeBase", "TimeRecord",
229 };
230 
231 // returns true is input starts with __ or _[A-Z] which are reserved identifiers
232 // in C/ C++. All calls should go through UnderscoresToCamelCase before getting here
233 // but this verifies and allows for future expansion if we decide to redefine what a
234 // reserved C identifier is (for example the GNU list
235 // https://www.gnu.org/software/libc/manual/html_node/Reserved-Names.html )
IsReservedCIdentifier(const string & input)236 bool IsReservedCIdentifier(const string& input) {
237   if (input.length() > 2) {
238     if (input.at(0) == '_') {
239       if (isupper(input.at(1)) || input.at(1) == '_') {
240         return true;
241       }
242     }
243   }
244   return false;
245 }
246 
SanitizeNameForObjC(const string & prefix,const string & input,const string & extension,string * out_suffix_added)247 string SanitizeNameForObjC(const string& prefix,
248                            const string& input,
249                            const string& extension,
250                            string* out_suffix_added) {
251   static const std::unordered_set<string> kReservedWords =
252       MakeWordsMap(kReservedWordList, GOOGLE_ARRAYSIZE(kReservedWordList));
253   static const std::unordered_set<string> kNSObjectMethods =
254       MakeWordsMap(kNSObjectMethodsList, GOOGLE_ARRAYSIZE(kNSObjectMethodsList));
255   string sanitized;
256   // We add the prefix in the cases where the string is missing a prefix.
257   // We define "missing a prefix" as where 'input':
258   // a) Doesn't start with the prefix or
259   // b) Isn't equivalent to the prefix or
260   // c) Has the prefix, but the letter after the prefix is lowercase
261   if (HasPrefixString(input, prefix)) {
262     if (input.length() == prefix.length() || !ascii_isupper(input[prefix.length()])) {
263       sanitized = prefix + input;
264     } else {
265       sanitized = input;
266     }
267   } else {
268     sanitized = prefix + input;
269   }
270   if (IsReservedCIdentifier(sanitized) ||
271       (kReservedWords.count(sanitized) > 0) ||
272       (kNSObjectMethods.count(sanitized) > 0)) {
273     if (out_suffix_added) *out_suffix_added = extension;
274     return sanitized + extension;
275   }
276   if (out_suffix_added) out_suffix_added->clear();
277   return sanitized;
278 }
279 
NameFromFieldDescriptor(const FieldDescriptor * field)280 string NameFromFieldDescriptor(const FieldDescriptor* field) {
281   if (field->type() == FieldDescriptor::TYPE_GROUP) {
282     return field->message_type()->name();
283   } else {
284     return field->name();
285   }
286 }
287 
PathSplit(const string & path,string * directory,string * basename)288 void PathSplit(const string& path, string* directory, string* basename) {
289   string::size_type last_slash = path.rfind('/');
290   if (last_slash == string::npos) {
291     if (directory) {
292       *directory = "";
293     }
294     if (basename) {
295       *basename = path;
296     }
297   } else {
298     if (directory) {
299       *directory = path.substr(0, last_slash);
300     }
301     if (basename) {
302       *basename = path.substr(last_slash + 1);
303     }
304   }
305 }
306 
IsSpecialName(const string & name,const string * special_names,size_t count)307 bool IsSpecialName(const string& name, const string* special_names,
308                    size_t count) {
309   for (size_t i = 0; i < count; ++i) {
310     size_t length = special_names[i].length();
311     if (name.compare(0, length, special_names[i]) == 0) {
312       if (name.length() > length) {
313         // If name is longer than the retained_name[i] that it matches
314         // the next character must be not lower case (newton vs newTon vs
315         // new_ton).
316         return !ascii_islower(name[length]);
317       } else {
318         return true;
319       }
320     }
321   }
322   return false;
323 }
324 
GetZeroEnumNameForFlagType(const FlagType flag_type)325 string GetZeroEnumNameForFlagType(const FlagType flag_type) {
326   switch(flag_type) {
327     case FLAGTYPE_DESCRIPTOR_INITIALIZATION:
328       return "GPBDescriptorInitializationFlag_None";
329     case FLAGTYPE_EXTENSION:
330       return "GPBExtensionNone";
331     case FLAGTYPE_FIELD:
332       return "GPBFieldNone";
333     default:
334       GOOGLE_LOG(FATAL) << "Can't get here.";
335       return "0";
336   }
337 }
338 
GetEnumNameForFlagType(const FlagType flag_type)339 string GetEnumNameForFlagType(const FlagType flag_type) {
340   switch(flag_type) {
341     case FLAGTYPE_DESCRIPTOR_INITIALIZATION:
342       return "GPBDescriptorInitializationFlags";
343     case FLAGTYPE_EXTENSION:
344       return "GPBExtensionOptions";
345     case FLAGTYPE_FIELD:
346       return "GPBFieldFlags";
347     default:
348       GOOGLE_LOG(FATAL) << "Can't get here.";
349       return string();
350   }
351 }
352 
353 }  // namespace
354 
355 // Escape C++ trigraphs by escaping question marks to \?
EscapeTrigraphs(const string & to_escape)356 string EscapeTrigraphs(const string& to_escape) {
357   return StringReplace(to_escape, "?", "\\?", true);
358 }
359 
StripProto(const string & filename)360 string StripProto(const string& filename) {
361   if (HasSuffixString(filename, ".protodevel")) {
362     return StripSuffixString(filename, ".protodevel");
363   } else {
364     return StripSuffixString(filename, ".proto");
365   }
366 }
367 
TrimWhitespace(StringPiece * input)368 void TrimWhitespace(StringPiece* input) {
369   while (!input->empty() && ascii_isspace(*input->data())) {
370     input->remove_prefix(1);
371   }
372   while (!input->empty() && ascii_isspace((*input)[input->length() - 1])) {
373     input->remove_suffix(1);
374   }
375 }
376 
377 
IsRetainedName(const string & name)378 bool IsRetainedName(const string& name) {
379   // List of prefixes from
380   // http://developer.apple.com/library/mac/#documentation/Cocoa/Conceptual/MemoryMgmt/Articles/mmRules.html
381   static const string retained_names[] = {"new", "alloc", "copy",
382                                           "mutableCopy"};
383   return IsSpecialName(name, retained_names,
384                        sizeof(retained_names) / sizeof(retained_names[0]));
385 }
386 
IsInitName(const string & name)387 bool IsInitName(const string& name) {
388   static const string init_names[] = {"init"};
389   return IsSpecialName(name, init_names,
390                        sizeof(init_names) / sizeof(init_names[0]));
391 }
392 
BaseFileName(const FileDescriptor * file)393 string BaseFileName(const FileDescriptor* file) {
394   string basename;
395   PathSplit(file->name(), NULL, &basename);
396   return basename;
397 }
398 
FileClassPrefix(const FileDescriptor * file)399 string FileClassPrefix(const FileDescriptor* file) {
400   // Default is empty string, no need to check has_objc_class_prefix.
401   string result = file->options().objc_class_prefix();
402   return result;
403 }
404 
FilePath(const FileDescriptor * file)405 string FilePath(const FileDescriptor* file) {
406   string output;
407   string basename;
408   string directory;
409   PathSplit(file->name(), &directory, &basename);
410   if (directory.length() > 0) {
411     output = directory + "/";
412   }
413   basename = StripProto(basename);
414 
415   // CamelCase to be more ObjC friendly.
416   basename = UnderscoresToCamelCase(basename, true);
417 
418   output += basename;
419   return output;
420 }
421 
FilePathBasename(const FileDescriptor * file)422 string FilePathBasename(const FileDescriptor* file) {
423   string output;
424   string basename;
425   string directory;
426   PathSplit(file->name(), &directory, &basename);
427   basename = StripProto(basename);
428 
429   // CamelCase to be more ObjC friendly.
430   output = UnderscoresToCamelCase(basename, true);
431 
432   return output;
433 }
434 
FileClassName(const FileDescriptor * file)435 string FileClassName(const FileDescriptor* file) {
436   const string prefix = FileClassPrefix(file);
437   const string name = UnderscoresToCamelCase(StripProto(BaseFileName(file)), true) + "Root";
438   // There aren't really any reserved words that end in "Root", but playing
439   // it safe and checking.
440   return SanitizeNameForObjC(prefix, name, "_RootClass", NULL);
441 }
442 
ClassNameWorker(const Descriptor * descriptor)443 string ClassNameWorker(const Descriptor* descriptor) {
444   string name;
445   if (descriptor->containing_type() != NULL) {
446     name = ClassNameWorker(descriptor->containing_type());
447     name += "_";
448   }
449   return name + descriptor->name();
450 }
451 
ClassNameWorker(const EnumDescriptor * descriptor)452 string ClassNameWorker(const EnumDescriptor* descriptor) {
453   string name;
454   if (descriptor->containing_type() != NULL) {
455     name = ClassNameWorker(descriptor->containing_type());
456     name += "_";
457   }
458   return name + descriptor->name();
459 }
460 
ClassName(const Descriptor * descriptor)461 string ClassName(const Descriptor* descriptor) {
462   return ClassName(descriptor, NULL);
463 }
464 
ClassName(const Descriptor * descriptor,string * out_suffix_added)465 string ClassName(const Descriptor* descriptor, string* out_suffix_added) {
466   // 1. Message names are used as is (style calls for CamelCase, trust it).
467   // 2. Check for reserved word at the very end and then suffix things.
468   const string prefix = FileClassPrefix(descriptor->file());
469   const string name = ClassNameWorker(descriptor);
470   return SanitizeNameForObjC(prefix, name, "_Class", out_suffix_added);
471 }
472 
EnumName(const EnumDescriptor * descriptor)473 string EnumName(const EnumDescriptor* descriptor) {
474   // 1. Enum names are used as is (style calls for CamelCase, trust it).
475   // 2. Check for reserved word at the every end and then suffix things.
476   //      message Fixed {
477   //        message Size {...}
478   //        enum Mumble {...}
479   //      ...
480   //      }
481   //    yields Fixed_Class, Fixed_Size.
482   const string prefix = FileClassPrefix(descriptor->file());
483   const string name = ClassNameWorker(descriptor);
484   return SanitizeNameForObjC(prefix, name, "_Enum", NULL);
485 }
486 
EnumValueName(const EnumValueDescriptor * descriptor)487 string EnumValueName(const EnumValueDescriptor* descriptor) {
488   // Because of the Switch enum compatibility, the name on the enum has to have
489   // the suffix handing, so it slightly diverges from how nested classes work.
490   //   enum Fixed {
491   //     FOO = 1
492   //   }
493   // yields Fixed_Enum and Fixed_Enum_Foo (not Fixed_Foo).
494   const string class_name = EnumName(descriptor->type());
495   const string value_str = UnderscoresToCamelCase(descriptor->name(), true);
496   const string name = class_name + "_" + value_str;
497   // There aren't really any reserved words with an underscore and a leading
498   // capital letter, but playing it safe and checking.
499   return SanitizeNameForObjC("", name, "_Value", NULL);
500 }
501 
EnumValueShortName(const EnumValueDescriptor * descriptor)502 string EnumValueShortName(const EnumValueDescriptor* descriptor) {
503   // Enum value names (EnumValueName above) are the enum name turned into
504   // a class name and then the value name is CamelCased and concatenated; the
505   // whole thing then gets sanitized for reserved words.
506   // The "short name" is intended to be the final leaf, the value name; but
507   // you can't simply send that off to sanitize as that could result in it
508   // getting modified when the full name didn't.  For example enum
509   // "StorageModes" has a value "retain".  So the full name is
510   // "StorageModes_Retain", but if we sanitize "retain" it would become
511   // "RetainValue".
512   // So the right way to get the short name is to take the full enum name
513   // and then strip off the enum name (leaving the value name and anything
514   // done by sanitize).
515   const string class_name = EnumName(descriptor->type());
516   const string long_name_prefix = class_name + "_";
517   const string long_name = EnumValueName(descriptor);
518   return StripPrefixString(long_name, long_name_prefix);
519 }
520 
UnCamelCaseEnumShortName(const string & name)521 string UnCamelCaseEnumShortName(const string& name) {
522   string result;
523   for (int i = 0; i < name.size(); i++) {
524     char c = name[i];
525     if (i > 0 && ascii_isupper(c)) {
526       result += '_';
527     }
528     result += ascii_toupper(c);
529   }
530   return result;
531 }
532 
ExtensionMethodName(const FieldDescriptor * descriptor)533 string ExtensionMethodName(const FieldDescriptor* descriptor) {
534   const string name = NameFromFieldDescriptor(descriptor);
535   const string result = UnderscoresToCamelCase(name, false);
536   return SanitizeNameForObjC("", result, "_Extension", NULL);
537 }
538 
FieldName(const FieldDescriptor * field)539 string FieldName(const FieldDescriptor* field) {
540   const string name = NameFromFieldDescriptor(field);
541   string result = UnderscoresToCamelCase(name, false);
542   if (field->is_repeated() && !field->is_map()) {
543     // Add "Array" before do check for reserved worlds.
544     result += "Array";
545   } else {
546     // If it wasn't repeated, but ends in "Array", force on the _p suffix.
547     if (HasSuffixString(result, "Array")) {
548       result += "_p";
549     }
550   }
551   return SanitizeNameForObjC("", result, "_p", NULL);
552 }
553 
FieldNameCapitalized(const FieldDescriptor * field)554 string FieldNameCapitalized(const FieldDescriptor* field) {
555   // Want the same suffix handling, so upcase the first letter of the other
556   // name.
557   string result = FieldName(field);
558   if (result.length() > 0) {
559     result[0] = ascii_toupper(result[0]);
560   }
561   return result;
562 }
563 
OneofEnumName(const OneofDescriptor * descriptor)564 string OneofEnumName(const OneofDescriptor* descriptor) {
565   const Descriptor* fieldDescriptor = descriptor->containing_type();
566   string name = ClassName(fieldDescriptor);
567   name += "_" + UnderscoresToCamelCase(descriptor->name(), true) + "_OneOfCase";
568   // No sanitize needed because the OS never has names that end in _OneOfCase.
569   return name;
570 }
571 
OneofName(const OneofDescriptor * descriptor)572 string OneofName(const OneofDescriptor* descriptor) {
573   string name = UnderscoresToCamelCase(descriptor->name(), false);
574   // No sanitize needed because it gets OneOfCase added and that shouldn't
575   // ever conflict.
576   return name;
577 }
578 
OneofNameCapitalized(const OneofDescriptor * descriptor)579 string OneofNameCapitalized(const OneofDescriptor* descriptor) {
580   // Use the common handling and then up-case the first letter.
581   string result = OneofName(descriptor);
582   if (result.length() > 0) {
583     result[0] = ascii_toupper(result[0]);
584   }
585   return result;
586 }
587 
ObjCClass(const string & class_name)588 string ObjCClass(const string& class_name) {
589   return string("GPBObjCClass(") + class_name + ")";
590 }
591 
ObjCClassDeclaration(const string & class_name)592 string ObjCClassDeclaration(const string& class_name) {
593   return string("GPBObjCClassDeclaration(") + class_name + ");";
594 }
595 
UnCamelCaseFieldName(const string & name,const FieldDescriptor * field)596 string UnCamelCaseFieldName(const string& name, const FieldDescriptor* field) {
597   string worker(name);
598   if (HasSuffixString(worker, "_p")) {
599     worker = StripSuffixString(worker, "_p");
600   }
601   if (field->is_repeated() && HasSuffixString(worker, "Array")) {
602     worker = StripSuffixString(worker, "Array");
603   }
604   if (field->type() == FieldDescriptor::TYPE_GROUP) {
605     if (worker.length() > 0) {
606       if (ascii_islower(worker[0])) {
607         worker[0] = ascii_toupper(worker[0]);
608       }
609     }
610     return worker;
611   } else {
612     string result;
613     for (int i = 0; i < worker.size(); i++) {
614       char c = worker[i];
615       if (ascii_isupper(c)) {
616         if (i > 0) {
617           result += '_';
618         }
619         result += ascii_tolower(c);
620       } else {
621         result += c;
622       }
623     }
624     return result;
625   }
626 }
627 
GetCapitalizedType(const FieldDescriptor * field)628 string GetCapitalizedType(const FieldDescriptor* field) {
629   switch (field->type()) {
630     case FieldDescriptor::TYPE_INT32:
631       return "Int32";
632     case FieldDescriptor::TYPE_UINT32:
633       return "UInt32";
634     case FieldDescriptor::TYPE_SINT32:
635       return "SInt32";
636     case FieldDescriptor::TYPE_FIXED32:
637       return "Fixed32";
638     case FieldDescriptor::TYPE_SFIXED32:
639       return "SFixed32";
640     case FieldDescriptor::TYPE_INT64:
641       return "Int64";
642     case FieldDescriptor::TYPE_UINT64:
643       return "UInt64";
644     case FieldDescriptor::TYPE_SINT64:
645       return "SInt64";
646     case FieldDescriptor::TYPE_FIXED64:
647       return "Fixed64";
648     case FieldDescriptor::TYPE_SFIXED64:
649       return "SFixed64";
650     case FieldDescriptor::TYPE_FLOAT:
651       return "Float";
652     case FieldDescriptor::TYPE_DOUBLE:
653       return "Double";
654     case FieldDescriptor::TYPE_BOOL:
655       return "Bool";
656     case FieldDescriptor::TYPE_STRING:
657       return "String";
658     case FieldDescriptor::TYPE_BYTES:
659       return "Bytes";
660     case FieldDescriptor::TYPE_ENUM:
661       return "Enum";
662     case FieldDescriptor::TYPE_GROUP:
663       return "Group";
664     case FieldDescriptor::TYPE_MESSAGE:
665       return "Message";
666   }
667 
668   // Some compilers report reaching end of function even though all cases of
669   // the enum are handed in the switch.
670   GOOGLE_LOG(FATAL) << "Can't get here.";
671   return string();
672 }
673 
GetObjectiveCType(FieldDescriptor::Type field_type)674 ObjectiveCType GetObjectiveCType(FieldDescriptor::Type field_type) {
675   switch (field_type) {
676     case FieldDescriptor::TYPE_INT32:
677     case FieldDescriptor::TYPE_SINT32:
678     case FieldDescriptor::TYPE_SFIXED32:
679       return OBJECTIVECTYPE_INT32;
680 
681     case FieldDescriptor::TYPE_UINT32:
682     case FieldDescriptor::TYPE_FIXED32:
683       return OBJECTIVECTYPE_UINT32;
684 
685     case FieldDescriptor::TYPE_INT64:
686     case FieldDescriptor::TYPE_SINT64:
687     case FieldDescriptor::TYPE_SFIXED64:
688       return OBJECTIVECTYPE_INT64;
689 
690     case FieldDescriptor::TYPE_UINT64:
691     case FieldDescriptor::TYPE_FIXED64:
692       return OBJECTIVECTYPE_UINT64;
693 
694     case FieldDescriptor::TYPE_FLOAT:
695       return OBJECTIVECTYPE_FLOAT;
696 
697     case FieldDescriptor::TYPE_DOUBLE:
698       return OBJECTIVECTYPE_DOUBLE;
699 
700     case FieldDescriptor::TYPE_BOOL:
701       return OBJECTIVECTYPE_BOOLEAN;
702 
703     case FieldDescriptor::TYPE_STRING:
704       return OBJECTIVECTYPE_STRING;
705 
706     case FieldDescriptor::TYPE_BYTES:
707       return OBJECTIVECTYPE_DATA;
708 
709     case FieldDescriptor::TYPE_ENUM:
710       return OBJECTIVECTYPE_ENUM;
711 
712     case FieldDescriptor::TYPE_GROUP:
713     case FieldDescriptor::TYPE_MESSAGE:
714       return OBJECTIVECTYPE_MESSAGE;
715   }
716 
717   // Some compilers report reaching end of function even though all cases of
718   // the enum are handed in the switch.
719   GOOGLE_LOG(FATAL) << "Can't get here.";
720   return OBJECTIVECTYPE_INT32;
721 }
722 
IsPrimitiveType(const FieldDescriptor * field)723 bool IsPrimitiveType(const FieldDescriptor* field) {
724   ObjectiveCType type = GetObjectiveCType(field);
725   switch (type) {
726     case OBJECTIVECTYPE_INT32:
727     case OBJECTIVECTYPE_UINT32:
728     case OBJECTIVECTYPE_INT64:
729     case OBJECTIVECTYPE_UINT64:
730     case OBJECTIVECTYPE_FLOAT:
731     case OBJECTIVECTYPE_DOUBLE:
732     case OBJECTIVECTYPE_BOOLEAN:
733     case OBJECTIVECTYPE_ENUM:
734       return true;
735       break;
736     default:
737       return false;
738   }
739 }
740 
IsReferenceType(const FieldDescriptor * field)741 bool IsReferenceType(const FieldDescriptor* field) {
742   return !IsPrimitiveType(field);
743 }
744 
HandleExtremeFloatingPoint(string val,bool add_float_suffix)745 static string HandleExtremeFloatingPoint(string val, bool add_float_suffix) {
746   if (val == "nan") {
747     return "NAN";
748   } else if (val == "inf") {
749     return "INFINITY";
750   } else if (val == "-inf") {
751     return "-INFINITY";
752   } else {
753     // float strings with ., e or E need to have f appended
754     if (add_float_suffix &&
755         (val.find(".") != string::npos || val.find("e") != string::npos ||
756          val.find("E") != string::npos)) {
757       val += "f";
758     }
759     return val;
760   }
761 }
762 
GPBGenericValueFieldName(const FieldDescriptor * field)763 string GPBGenericValueFieldName(const FieldDescriptor* field) {
764   // Returns the field within the GPBGenericValue union to use for the given
765   // field.
766   if (field->is_repeated()) {
767       return "valueMessage";
768   }
769   switch (field->cpp_type()) {
770     case FieldDescriptor::CPPTYPE_INT32:
771       return "valueInt32";
772     case FieldDescriptor::CPPTYPE_UINT32:
773       return "valueUInt32";
774     case FieldDescriptor::CPPTYPE_INT64:
775       return "valueInt64";
776     case FieldDescriptor::CPPTYPE_UINT64:
777       return "valueUInt64";
778     case FieldDescriptor::CPPTYPE_FLOAT:
779       return "valueFloat";
780     case FieldDescriptor::CPPTYPE_DOUBLE:
781       return "valueDouble";
782     case FieldDescriptor::CPPTYPE_BOOL:
783       return "valueBool";
784     case FieldDescriptor::CPPTYPE_STRING:
785       if (field->type() == FieldDescriptor::TYPE_BYTES) {
786         return "valueData";
787       } else {
788         return "valueString";
789       }
790     case FieldDescriptor::CPPTYPE_ENUM:
791       return "valueEnum";
792     case FieldDescriptor::CPPTYPE_MESSAGE:
793       return "valueMessage";
794   }
795 
796   // Some compilers report reaching end of function even though all cases of
797   // the enum are handed in the switch.
798   GOOGLE_LOG(FATAL) << "Can't get here.";
799   return string();
800 }
801 
802 
DefaultValue(const FieldDescriptor * field)803 string DefaultValue(const FieldDescriptor* field) {
804   // Repeated fields don't have defaults.
805   if (field->is_repeated()) {
806     return "nil";
807   }
808 
809   // Switch on cpp_type since we need to know which default_value_* method
810   // of FieldDescriptor to call.
811   switch (field->cpp_type()) {
812     case FieldDescriptor::CPPTYPE_INT32:
813       // gcc and llvm reject the decimal form of kint32min and kint64min.
814       if (field->default_value_int32() == INT_MIN) {
815         return "-0x80000000";
816       }
817       return StrCat(field->default_value_int32());
818     case FieldDescriptor::CPPTYPE_UINT32:
819       return StrCat(field->default_value_uint32()) + "U";
820     case FieldDescriptor::CPPTYPE_INT64:
821       // gcc and llvm reject the decimal form of kint32min and kint64min.
822       if (field->default_value_int64() == LLONG_MIN) {
823         return "-0x8000000000000000LL";
824       }
825       return StrCat(field->default_value_int64()) + "LL";
826     case FieldDescriptor::CPPTYPE_UINT64:
827       return StrCat(field->default_value_uint64()) + "ULL";
828     case FieldDescriptor::CPPTYPE_DOUBLE:
829       return HandleExtremeFloatingPoint(
830           SimpleDtoa(field->default_value_double()), false);
831     case FieldDescriptor::CPPTYPE_FLOAT:
832       return HandleExtremeFloatingPoint(
833           SimpleFtoa(field->default_value_float()), true);
834     case FieldDescriptor::CPPTYPE_BOOL:
835       return field->default_value_bool() ? "YES" : "NO";
836     case FieldDescriptor::CPPTYPE_STRING: {
837       const bool has_default_value = field->has_default_value();
838       const string& default_string = field->default_value_string();
839       if (!has_default_value || default_string.length() == 0) {
840         // If the field is defined as being the empty string,
841         // then we will just assign to nil, as the empty string is the
842         // default for both strings and data.
843         return "nil";
844       }
845       if (field->type() == FieldDescriptor::TYPE_BYTES) {
846         // We want constant fields in our data structures so we can
847         // declare them as static. To achieve this we cheat and stuff
848         // a escaped c string (prefixed with a length) into the data
849         // field, and cast it to an (NSData*) so it will compile.
850         // The runtime library knows how to handle it.
851 
852         // Must convert to a standard byte order for packing length into
853         // a cstring.
854         uint32 length = ghtonl(default_string.length());
855         string bytes((const char*)&length, sizeof(length));
856         bytes.append(default_string);
857         return "(NSData*)\"" + EscapeTrigraphs(CEscape(bytes)) + "\"";
858       } else {
859         return "@\"" + EscapeTrigraphs(CEscape(default_string)) + "\"";
860       }
861     }
862     case FieldDescriptor::CPPTYPE_ENUM:
863       return EnumValueName(field->default_value_enum());
864     case FieldDescriptor::CPPTYPE_MESSAGE:
865       return "nil";
866   }
867 
868   // Some compilers report reaching end of function even though all cases of
869   // the enum are handed in the switch.
870   GOOGLE_LOG(FATAL) << "Can't get here.";
871   return string();
872 }
873 
HasNonZeroDefaultValue(const FieldDescriptor * field)874 bool HasNonZeroDefaultValue(const FieldDescriptor* field) {
875   // Repeated fields don't have defaults.
876   if (field->is_repeated()) {
877     return false;
878   }
879 
880   // As much as checking field->has_default_value() seems useful, it isn't
881   // because of enums. proto2 syntax allows the first item in an enum (the
882   // default) to be non zero. So checking field->has_default_value() would
883   // result in missing this non zero default.  See MessageWithOneBasedEnum in
884   // objectivec/Tests/unittest_objc.proto for a test Message to confirm this.
885 
886   // Some proto file set the default to the zero value, so make sure the value
887   // isn't the zero case.
888   switch (field->cpp_type()) {
889     case FieldDescriptor::CPPTYPE_INT32:
890       return field->default_value_int32() != 0;
891     case FieldDescriptor::CPPTYPE_UINT32:
892       return field->default_value_uint32() != 0U;
893     case FieldDescriptor::CPPTYPE_INT64:
894       return field->default_value_int64() != 0LL;
895     case FieldDescriptor::CPPTYPE_UINT64:
896       return field->default_value_uint64() != 0ULL;
897     case FieldDescriptor::CPPTYPE_DOUBLE:
898       return field->default_value_double() != 0.0;
899     case FieldDescriptor::CPPTYPE_FLOAT:
900       return field->default_value_float() != 0.0f;
901     case FieldDescriptor::CPPTYPE_BOOL:
902       return field->default_value_bool();
903     case FieldDescriptor::CPPTYPE_STRING: {
904       const string& default_string = field->default_value_string();
905       return default_string.length() != 0;
906     }
907     case FieldDescriptor::CPPTYPE_ENUM:
908       return field->default_value_enum()->number() != 0;
909     case FieldDescriptor::CPPTYPE_MESSAGE:
910       return false;
911   }
912 
913   // Some compilers report reaching end of function even though all cases of
914   // the enum are handed in the switch.
915   GOOGLE_LOG(FATAL) << "Can't get here.";
916   return false;
917 }
918 
BuildFlagsString(const FlagType flag_type,const std::vector<string> & strings)919 string BuildFlagsString(const FlagType flag_type,
920                         const std::vector<string>& strings) {
921   if (strings.empty()) {
922     return GetZeroEnumNameForFlagType(flag_type);
923   } else if (strings.size() == 1) {
924     return strings[0];
925   }
926   string string("(" + GetEnumNameForFlagType(flag_type) + ")(");
927   for (size_t i = 0; i != strings.size(); ++i) {
928     if (i > 0) {
929       string.append(" | ");
930     }
931     string.append(strings[i]);
932   }
933   string.append(")");
934   return string;
935 }
936 
BuildCommentsString(const SourceLocation & location,bool prefer_single_line)937 string BuildCommentsString(const SourceLocation& location,
938                            bool prefer_single_line) {
939   const string& comments = location.leading_comments.empty()
940                                ? location.trailing_comments
941                                : location.leading_comments;
942   std::vector<string> lines;
943   lines = Split(comments, "\n", false);
944   while (!lines.empty() && lines.back().empty()) {
945     lines.pop_back();
946   }
947   // If there are no comments, just return an empty string.
948   if (lines.empty()) {
949     return "";
950   }
951 
952   string prefix;
953   string suffix;
954   string final_comments;
955   string epilogue;
956 
957   bool add_leading_space = false;
958 
959   if (prefer_single_line && lines.size() == 1) {
960     prefix = "/** ";
961     suffix = " */\n";
962   } else {
963     prefix = "* ";
964     suffix = "\n";
965     final_comments += "/**\n";
966     epilogue = " **/\n";
967     add_leading_space = true;
968   }
969 
970   for (int i = 0; i < lines.size(); i++) {
971     string line = StripPrefixString(lines[i], " ");
972     // HeaderDoc and appledoc use '\' and '@' for markers; escape them.
973     line = StringReplace(line, "\\", "\\\\", true);
974     line = StringReplace(line, "@", "\\@", true);
975     // Decouple / from * to not have inline comments inside comments.
976     line = StringReplace(line, "/*", "/\\*", true);
977     line = StringReplace(line, "*/", "*\\/", true);
978     line = prefix + line;
979     StripWhitespace(&line);
980     // If not a one line, need to add the first space before *, as
981     // StripWhitespace would have removed it.
982     line = (add_leading_space ? " " : "") + line;
983     final_comments += line + suffix;
984   }
985   final_comments += epilogue;
986   return final_comments;
987 }
988 
989 // Making these a generator option for folks that don't use CocoaPods, but do
990 // want to put the library in a framework is an interesting question. The
991 // problem is it means changing sources shipped with the library to actually
992 // use a different value; so it isn't as simple as a option.
993 const char* const ProtobufLibraryFrameworkName = "Protobuf";
994 
ProtobufFrameworkImportSymbol(const string & framework_name)995 string ProtobufFrameworkImportSymbol(const string& framework_name) {
996   // GPB_USE_[framework_name]_FRAMEWORK_IMPORTS
997   string result = string("GPB_USE_");
998   result += ToUpper(framework_name);
999   result += "_FRAMEWORK_IMPORTS";
1000   return result;
1001 }
1002 
IsProtobufLibraryBundledProtoFile(const FileDescriptor * file)1003 bool IsProtobufLibraryBundledProtoFile(const FileDescriptor* file) {
1004   // We don't check the name prefix or proto package because some files
1005   // (descriptor.proto), aren't shipped generated by the library, so this
1006   // seems to be the safest way to only catch the ones shipped.
1007   const string name = file->name();
1008   if (name == "google/protobuf/any.proto" ||
1009       name == "google/protobuf/api.proto" ||
1010       name == "google/protobuf/duration.proto" ||
1011       name == "google/protobuf/empty.proto" ||
1012       name == "google/protobuf/field_mask.proto" ||
1013       name == "google/protobuf/source_context.proto" ||
1014       name == "google/protobuf/struct.proto" ||
1015       name == "google/protobuf/timestamp.proto" ||
1016       name == "google/protobuf/type.proto" ||
1017       name == "google/protobuf/wrappers.proto") {
1018     return true;
1019   }
1020   return false;
1021 }
1022 
ReadLine(StringPiece * input,StringPiece * line)1023 bool ReadLine(StringPiece* input, StringPiece* line) {
1024   for (int len = 0; len < input->size(); ++len) {
1025     if (ascii_isnewline((*input)[len])) {
1026       *line = StringPiece(input->data(), len);
1027       ++len;  // advance over the newline
1028       *input = StringPiece(input->data() + len, input->size() - len);
1029       return true;
1030     }
1031   }
1032   return false;  // Ran out of input with no newline.
1033 }
1034 
RemoveComment(StringPiece * input)1035 void RemoveComment(StringPiece* input) {
1036   int offset = input->find('#');
1037   if (offset != StringPiece::npos) {
1038     input->remove_suffix(input->length() - offset);
1039   }
1040 }
1041 
1042 namespace {
1043 
1044 class ExpectedPrefixesCollector : public LineConsumer {
1045  public:
ExpectedPrefixesCollector(std::map<string,string> * inout_package_to_prefix_map)1046   ExpectedPrefixesCollector(std::map<string, string>* inout_package_to_prefix_map)
1047       : prefix_map_(inout_package_to_prefix_map) {}
1048 
1049   virtual bool ConsumeLine(const StringPiece& line, string* out_error);
1050 
1051  private:
1052   std::map<string, string>* prefix_map_;
1053 };
1054 
ConsumeLine(const StringPiece & line,string * out_error)1055 bool ExpectedPrefixesCollector::ConsumeLine(
1056     const StringPiece& line, string* out_error) {
1057   int offset = line.find('=');
1058   if (offset == StringPiece::npos) {
1059     *out_error = string("Expected prefixes file line without equal sign: '") +
1060                  string(line) + "'.";
1061     return false;
1062   }
1063   StringPiece package = line.substr(0, offset);
1064   StringPiece prefix = line.substr(offset + 1);
1065   TrimWhitespace(&package);
1066   TrimWhitespace(&prefix);
1067   // Don't really worry about error checking the package/prefix for
1068   // being valid.  Assume the file is validated when it is created/edited.
1069   (*prefix_map_)[string(package)] = string(prefix);
1070   return true;
1071 }
1072 
LoadExpectedPackagePrefixes(const Options & generation_options,std::map<string,string> * prefix_map,string * out_error)1073 bool LoadExpectedPackagePrefixes(const Options &generation_options,
1074                                  std::map<string, string>* prefix_map,
1075                                  string* out_error) {
1076   if (generation_options.expected_prefixes_path.empty()) {
1077     return true;
1078   }
1079 
1080   ExpectedPrefixesCollector collector(prefix_map);
1081   return ParseSimpleFile(
1082       generation_options.expected_prefixes_path, &collector, out_error);
1083 }
1084 
ValidateObjCClassPrefix(const FileDescriptor * file,const string & expected_prefixes_path,const std::map<string,string> & expected_package_prefixes,string * out_error)1085 bool ValidateObjCClassPrefix(
1086     const FileDescriptor* file,
1087     const string& expected_prefixes_path,
1088     const std::map<string, string>& expected_package_prefixes,
1089     string* out_error) {
1090   const string prefix = file->options().objc_class_prefix();
1091   const string package = file->package();
1092 
1093   // NOTE: src/google/protobuf/compiler/plugin.cc makes use of cerr for some
1094   // error cases, so it seems to be ok to use as a back door for warnings.
1095 
1096   // Check: Error - See if there was an expected prefix for the package and
1097   // report if it doesn't match (wrong or missing).
1098   std::map<string, string>::const_iterator package_match =
1099       expected_package_prefixes.find(package);
1100   if (package_match != expected_package_prefixes.end()) {
1101     // There was an entry, and...
1102     if (package_match->second == prefix) {
1103       // ...it matches.  All good, out of here!
1104       return true;
1105     } else {
1106       // ...it didn't match!
1107       *out_error = "error: Expected 'option objc_class_prefix = \"" +
1108                    package_match->second + "\";' for package '" + package +
1109                    "' in '" + file->name() + "'";
1110       if (prefix.length()) {
1111         *out_error += "; but found '" + prefix + "' instead";
1112       }
1113       *out_error += ".";
1114       return false;
1115     }
1116   }
1117 
1118   // If there was no prefix option, we're done at this point.
1119   if (prefix.empty()) {
1120     // No prefix, nothing left to check.
1121     return true;
1122   }
1123 
1124   // Check: Warning - Make sure the prefix is is a reasonable value according
1125   // to Apple's rules (the checks above implicitly whitelist anything that
1126   // doesn't meet these rules).
1127   if (!ascii_isupper(prefix[0])) {
1128     std::cerr << std::endl
1129          << "protoc:0: warning: Invalid 'option objc_class_prefix = \""
1130          << prefix << "\";' in '" << file->name() << "';"
1131          << " it should start with a capital letter." << std::endl;
1132     std::cerr.flush();
1133   }
1134   if (prefix.length() < 3) {
1135     // Apple reserves 2 character prefixes for themselves. They do use some
1136     // 3 character prefixes, but they haven't updated the rules/docs.
1137     std::cerr << std::endl
1138          << "protoc:0: warning: Invalid 'option objc_class_prefix = \""
1139          << prefix << "\";' in '" << file->name() << "';"
1140          << " Apple recommends they should be at least 3 characters long."
1141          << std::endl;
1142     std::cerr.flush();
1143   }
1144 
1145   // Look for any other package that uses the same prefix.
1146   string other_package_for_prefix;
1147   for (std::map<string, string>::const_iterator i = expected_package_prefixes.begin();
1148        i != expected_package_prefixes.end(); ++i) {
1149     if (i->second == prefix) {
1150       other_package_for_prefix = i->first;
1151       break;
1152     }
1153   }
1154 
1155   // Check: Warning - If the file does not have a package, check whether
1156   // the prefix declared is being used by another package or not.
1157   if (package.empty()) {
1158     // The file does not have a package and ...
1159     if (other_package_for_prefix.empty()) {
1160       // ... no other package has declared that prefix.
1161       std::cerr << std::endl
1162            << "protoc:0: warning: File '" << file->name() << "' has no "
1163            << "package. Consider adding a new package to the proto and adding '"
1164            << "new.package = " << prefix << "' to the expected prefixes file ("
1165            << expected_prefixes_path << ")." << std::endl;
1166       std::cerr.flush();
1167     } else {
1168       // ... another package has declared the same prefix.
1169       std::cerr << std::endl
1170            << "protoc:0: warning: File '" << file->name() << "' has no package "
1171            << "and package '" << other_package_for_prefix << "' already uses '"
1172            << prefix << "' as its prefix. Consider either adding a new package "
1173            << "to the proto, or reusing one of the packages already using this "
1174            << "prefix in the expected prefixes file ("
1175            << expected_prefixes_path << ")." << std::endl;
1176       std::cerr.flush();
1177     }
1178     return true;
1179   }
1180 
1181   // Check: Error - Make sure the prefix wasn't expected for a different
1182   // package (overlap is allowed, but it has to be listed as an expected
1183   // overlap).
1184   if (!other_package_for_prefix.empty()) {
1185     *out_error =
1186         "error: Found 'option objc_class_prefix = \"" + prefix +
1187         "\";' in '" + file->name() +
1188         "'; that prefix is already used for 'package " +
1189         other_package_for_prefix + ";'. It can only be reused by listing " +
1190         "it in the expected file (" +
1191         expected_prefixes_path + ").";
1192     return false;  // Only report first usage of the prefix.
1193   }
1194 
1195   // Check: Warning - If the given package/prefix pair wasn't expected, issue a
1196   // warning issue a warning suggesting it gets added to the file.
1197   if (!expected_package_prefixes.empty()) {
1198     std::cerr << std::endl
1199          << "protoc:0: warning: Found unexpected 'option objc_class_prefix = \""
1200          << prefix << "\";' in '" << file->name() << "';"
1201          << " consider adding it to the expected prefixes file ("
1202          << expected_prefixes_path << ")." << std::endl;
1203     std::cerr.flush();
1204   }
1205 
1206   return true;
1207 }
1208 
1209 }  // namespace
1210 
ValidateObjCClassPrefixes(const std::vector<const FileDescriptor * > & files,const Options & generation_options,string * out_error)1211 bool ValidateObjCClassPrefixes(const std::vector<const FileDescriptor*>& files,
1212                                const Options& generation_options,
1213                                string* out_error) {
1214   // Load the expected package prefixes, if available, to validate against.
1215   std::map<string, string> expected_package_prefixes;
1216   if (!LoadExpectedPackagePrefixes(generation_options,
1217                                    &expected_package_prefixes,
1218                                    out_error)) {
1219     return false;
1220   }
1221 
1222   for (int i = 0; i < files.size(); i++) {
1223     bool should_skip =
1224       (std::find(generation_options.expected_prefixes_suppressions.begin(),
1225                  generation_options.expected_prefixes_suppressions.end(),
1226                  files[i]->name())
1227           != generation_options.expected_prefixes_suppressions.end());
1228     if (should_skip) {
1229       continue;
1230     }
1231 
1232     bool is_valid =
1233         ValidateObjCClassPrefix(files[i],
1234                                 generation_options.expected_prefixes_path,
1235                                 expected_package_prefixes,
1236                                 out_error);
1237     if (!is_valid) {
1238       return false;
1239     }
1240   }
1241   return true;
1242 }
1243 
TextFormatDecodeData()1244 TextFormatDecodeData::TextFormatDecodeData() { }
1245 
~TextFormatDecodeData()1246 TextFormatDecodeData::~TextFormatDecodeData() { }
1247 
AddString(int32 key,const string & input_for_decode,const string & desired_output)1248 void TextFormatDecodeData::AddString(int32 key,
1249                                      const string& input_for_decode,
1250                                      const string& desired_output) {
1251   for (std::vector<DataEntry>::const_iterator i = entries_.begin();
1252        i != entries_.end(); ++i) {
1253     if (i->first == key) {
1254       std::cerr << "error: duplicate key (" << key
1255            << ") making TextFormat data, input: \"" << input_for_decode
1256            << "\", desired: \"" << desired_output << "\"." << std::endl;
1257       std::cerr.flush();
1258       abort();
1259     }
1260   }
1261 
1262   const string& data = TextFormatDecodeData::DecodeDataForString(
1263       input_for_decode, desired_output);
1264   entries_.push_back(DataEntry(key, data));
1265 }
1266 
Data() const1267 string TextFormatDecodeData::Data() const {
1268   std::ostringstream data_stringstream;
1269 
1270   if (num_entries() > 0) {
1271     io::OstreamOutputStream data_outputstream(&data_stringstream);
1272     io::CodedOutputStream output_stream(&data_outputstream);
1273 
1274     output_stream.WriteVarint32(num_entries());
1275     for (std::vector<DataEntry>::const_iterator i = entries_.begin();
1276          i != entries_.end(); ++i) {
1277       output_stream.WriteVarint32(i->first);
1278       output_stream.WriteString(i->second);
1279     }
1280   }
1281 
1282   data_stringstream.flush();
1283   return data_stringstream.str();
1284 }
1285 
1286 namespace {
1287 
1288 // Helper to build up the decode data for a string.
1289 class DecodeDataBuilder {
1290  public:
DecodeDataBuilder()1291   DecodeDataBuilder() { Reset(); }
1292 
1293   bool AddCharacter(const char desired, const char input);
AddUnderscore()1294   void AddUnderscore() {
1295     Push();
1296     need_underscore_ = true;
1297   }
Finish()1298   string Finish() {
1299     Push();
1300     return decode_data_;
1301   }
1302 
1303  private:
1304   static constexpr uint8 kAddUnderscore = 0x80;
1305 
1306   static constexpr uint8 kOpAsIs = 0x00;
1307   static constexpr uint8 kOpFirstUpper = 0x40;
1308   static constexpr uint8 kOpFirstLower = 0x20;
1309   static constexpr uint8 kOpAllUpper = 0x60;
1310 
1311   static constexpr int kMaxSegmentLen = 0x1f;
1312 
AddChar(const char desired)1313   void AddChar(const char desired) {
1314     ++segment_len_;
1315     is_all_upper_ &= ascii_isupper(desired);
1316   }
1317 
Push()1318   void Push() {
1319     uint8 op = (op_ | segment_len_);
1320     if (need_underscore_) op |= kAddUnderscore;
1321     if (op != 0) {
1322       decode_data_ += (char)op;
1323     }
1324     Reset();
1325   }
1326 
AddFirst(const char desired,const char input)1327   bool AddFirst(const char desired, const char input) {
1328     if (desired == input) {
1329       op_ = kOpAsIs;
1330     } else if (desired == ascii_toupper(input)) {
1331       op_ = kOpFirstUpper;
1332     } else if (desired == ascii_tolower(input)) {
1333       op_ = kOpFirstLower;
1334     } else {
1335       // Can't be transformed to match.
1336       return false;
1337     }
1338     AddChar(desired);
1339     return true;
1340   }
1341 
Reset()1342   void Reset() {
1343     need_underscore_ = false;
1344     op_ = 0;
1345     segment_len_ = 0;
1346     is_all_upper_ = true;
1347   }
1348 
1349   bool need_underscore_;
1350   bool is_all_upper_;
1351   uint8 op_;
1352   int segment_len_;
1353 
1354   string decode_data_;
1355 };
1356 
AddCharacter(const char desired,const char input)1357 bool DecodeDataBuilder::AddCharacter(const char desired, const char input) {
1358   // If we've hit the max size, push to start a new segment.
1359   if (segment_len_ == kMaxSegmentLen) {
1360     Push();
1361   }
1362   if (segment_len_ == 0) {
1363     return AddFirst(desired, input);
1364   }
1365 
1366   // Desired and input match...
1367   if (desired == input) {
1368     // If we aren't transforming it, or we're upper casing it and it is
1369     // supposed to be uppercase; just add it to the segment.
1370     if ((op_ != kOpAllUpper) || ascii_isupper(desired)) {
1371       AddChar(desired);
1372       return true;
1373     }
1374 
1375     // Add the current segment, and start the next one.
1376     Push();
1377     return AddFirst(desired, input);
1378   }
1379 
1380   // If we need to uppercase, and everything so far has been uppercase,
1381   // promote op to AllUpper.
1382   if ((desired == ascii_toupper(input)) && is_all_upper_) {
1383     op_ = kOpAllUpper;
1384     AddChar(desired);
1385     return true;
1386   }
1387 
1388   // Give up, push and start a new segment.
1389   Push();
1390   return AddFirst(desired, input);
1391 }
1392 
1393 // If decode data can't be generated, a directive for the raw string
1394 // is used instead.
DirectDecodeString(const string & str)1395 string DirectDecodeString(const string& str) {
1396   string result;
1397   result += (char)'\0';  // Marker for full string.
1398   result += str;
1399   result += (char)'\0';  // End of string.
1400   return result;
1401 }
1402 
1403 }  // namespace
1404 
1405 // static
DecodeDataForString(const string & input_for_decode,const string & desired_output)1406 string TextFormatDecodeData::DecodeDataForString(const string& input_for_decode,
1407                                                  const string& desired_output) {
1408   if (input_for_decode.empty() || desired_output.empty()) {
1409     std::cerr << "error: got empty string for making TextFormat data, input: \""
1410          << input_for_decode << "\", desired: \"" << desired_output << "\"."
1411          << std::endl;
1412     std::cerr.flush();
1413     abort();
1414   }
1415   if ((input_for_decode.find('\0') != string::npos) ||
1416       (desired_output.find('\0') != string::npos)) {
1417     std::cerr << "error: got a null char in a string for making TextFormat data,"
1418          << " input: \"" << CEscape(input_for_decode) << "\", desired: \""
1419          << CEscape(desired_output) << "\"." << std::endl;
1420     std::cerr.flush();
1421     abort();
1422   }
1423 
1424   DecodeDataBuilder builder;
1425 
1426   // Walk the output building it from the input.
1427   int x = 0;
1428   for (int y = 0; y < desired_output.size(); y++) {
1429     const char d = desired_output[y];
1430     if (d == '_') {
1431       builder.AddUnderscore();
1432       continue;
1433     }
1434 
1435     if (x >= input_for_decode.size()) {
1436       // Out of input, no way to encode it, just return a full decode.
1437       return DirectDecodeString(desired_output);
1438     }
1439     if (builder.AddCharacter(d, input_for_decode[x])) {
1440       ++x;  // Consumed one input
1441     } else {
1442       // Couldn't transform for the next character, just return a full decode.
1443       return DirectDecodeString(desired_output);
1444     }
1445   }
1446 
1447   if (x != input_for_decode.size()) {
1448     // Extra input (suffix from name sanitizing?), just return a full decode.
1449     return DirectDecodeString(desired_output);
1450   }
1451 
1452   // Add the end marker.
1453   return builder.Finish() + (char)'\0';
1454 }
1455 
1456 namespace {
1457 
1458 class Parser {
1459  public:
Parser(LineConsumer * line_consumer)1460   Parser(LineConsumer* line_consumer)
1461       : line_consumer_(line_consumer), line_(0) {}
1462 
1463   // Parses a check of input, returning success/failure.
1464   bool ParseChunk(StringPiece chunk);
1465 
1466   // Should be called to finish parsing (after all input has been provided via
1467   // ParseChunk()).  Returns success/failure.
1468   bool Finish();
1469 
last_line() const1470   int last_line() const { return line_; }
error_str() const1471   string error_str() const { return error_str_; }
1472 
1473  private:
1474   bool ParseLoop();
1475 
1476   LineConsumer* line_consumer_;
1477   int line_;
1478   string error_str_;
1479   StringPiece p_;
1480   string leftover_;
1481 };
1482 
ParseChunk(StringPiece chunk)1483 bool Parser::ParseChunk(StringPiece chunk) {
1484   if (!leftover_.empty()) {
1485     leftover_ += string(chunk);
1486     p_ = StringPiece(leftover_);
1487   } else {
1488     p_ = chunk;
1489   }
1490   bool result = ParseLoop();
1491   if (p_.empty()) {
1492     leftover_.clear();
1493   } else {
1494     leftover_ = string(p_);
1495   }
1496   return result;
1497 }
1498 
Finish()1499 bool Parser::Finish() {
1500   if (leftover_.empty()) {
1501     return true;
1502   }
1503   // Force a newline onto the end to finish parsing.
1504   leftover_ += "\n";
1505   p_ = StringPiece(leftover_);
1506   if (!ParseLoop()) {
1507     return false;
1508   }
1509   return p_.empty();  // Everything used?
1510 }
1511 
ParseLoop()1512 bool Parser::ParseLoop() {
1513   StringPiece line;
1514   while (ReadLine(&p_, &line)) {
1515     ++line_;
1516     RemoveComment(&line);
1517     TrimWhitespace(&line);
1518     if (line.empty()) {
1519       continue;  // Blank line.
1520     }
1521     if (!line_consumer_->ConsumeLine(line, &error_str_)) {
1522       return false;
1523     }
1524   }
1525   return true;
1526 }
1527 
1528 }  // namespace
1529 
LineConsumer()1530 LineConsumer::LineConsumer() {}
1531 
~LineConsumer()1532 LineConsumer::~LineConsumer() {}
1533 
ParseSimpleFile(const string & path,LineConsumer * line_consumer,string * out_error)1534 bool ParseSimpleFile(
1535     const string& path, LineConsumer* line_consumer, string* out_error) {
1536   int fd;
1537   do {
1538     fd = posix::open(path.c_str(), O_RDONLY);
1539   } while (fd < 0 && errno == EINTR);
1540   if (fd < 0) {
1541     *out_error =
1542         string("error: Unable to open \"") + path + "\", " + strerror(errno);
1543     return false;
1544   }
1545   io::FileInputStream file_stream(fd);
1546   file_stream.SetCloseOnDelete(true);
1547 
1548   Parser parser(line_consumer);
1549   const void* buf;
1550   int buf_len;
1551   while (file_stream.Next(&buf, &buf_len)) {
1552     if (buf_len == 0) {
1553       continue;
1554     }
1555 
1556     if (!parser.ParseChunk(StringPiece(static_cast<const char*>(buf), buf_len))) {
1557       *out_error =
1558           string("error: ") + path +
1559           " Line " + StrCat(parser.last_line()) + ", " + parser.error_str();
1560       return false;
1561     }
1562   }
1563   return parser.Finish();
1564 }
1565 
ImportWriter(const string & generate_for_named_framework,const string & named_framework_to_proto_path_mappings_path,const string & runtime_import_prefix,bool include_wkt_imports)1566 ImportWriter::ImportWriter(
1567   const string& generate_for_named_framework,
1568   const string& named_framework_to_proto_path_mappings_path,
1569   const string& runtime_import_prefix,
1570   bool include_wkt_imports)
1571     : generate_for_named_framework_(generate_for_named_framework),
1572       named_framework_to_proto_path_mappings_path_(
1573           named_framework_to_proto_path_mappings_path),
1574       runtime_import_prefix_(runtime_import_prefix),
1575       include_wkt_imports_(include_wkt_imports),
1576       need_to_parse_mapping_file_(true) {
1577 }
1578 
~ImportWriter()1579 ImportWriter::~ImportWriter() {}
1580 
AddFile(const FileDescriptor * file,const string & header_extension)1581 void ImportWriter::AddFile(const FileDescriptor* file,
1582                            const string& header_extension) {
1583   if (IsProtobufLibraryBundledProtoFile(file)) {
1584     // The imports of the WKTs are only needed within the library itself,
1585     // in other cases, they get skipped because the generated code already
1586     // import GPBProtocolBuffers.h and hence proves them.
1587     if (include_wkt_imports_) {
1588       const string header_name =
1589         "GPB" + FilePathBasename(file) + header_extension;
1590       protobuf_imports_.push_back(header_name);
1591     }
1592     return;
1593   }
1594 
1595   // Lazy parse any mappings.
1596   if (need_to_parse_mapping_file_) {
1597     ParseFrameworkMappings();
1598   }
1599 
1600   std::map<string, string>::iterator proto_lookup =
1601       proto_file_to_framework_name_.find(file->name());
1602   if (proto_lookup != proto_file_to_framework_name_.end()) {
1603     other_framework_imports_.push_back(
1604         proto_lookup->second + "/" +
1605         FilePathBasename(file) + header_extension);
1606     return;
1607   }
1608 
1609   if (!generate_for_named_framework_.empty()) {
1610     other_framework_imports_.push_back(
1611         generate_for_named_framework_ + "/" +
1612         FilePathBasename(file) + header_extension);
1613     return;
1614   }
1615 
1616   other_imports_.push_back(FilePath(file) + header_extension);
1617 }
1618 
Print(io::Printer * printer) const1619 void ImportWriter::Print(io::Printer* printer) const {
1620   bool add_blank_line = false;
1621 
1622   if (!protobuf_imports_.empty()) {
1623     PrintRuntimeImports(printer, protobuf_imports_, runtime_import_prefix_);
1624     add_blank_line = true;
1625   }
1626 
1627   if (!other_framework_imports_.empty()) {
1628     if (add_blank_line) {
1629       printer->Print("\n");
1630     }
1631 
1632     for (std::vector<string>::const_iterator iter = other_framework_imports_.begin();
1633          iter != other_framework_imports_.end(); ++iter) {
1634       printer->Print(
1635           "#import <$header$>\n",
1636           "header", *iter);
1637     }
1638 
1639     add_blank_line = true;
1640   }
1641 
1642   if (!other_imports_.empty()) {
1643     if (add_blank_line) {
1644       printer->Print("\n");
1645     }
1646 
1647     for (std::vector<string>::const_iterator iter = other_imports_.begin();
1648          iter != other_imports_.end(); ++iter) {
1649       printer->Print(
1650           "#import \"$header$\"\n",
1651           "header", *iter);
1652     }
1653   }
1654 }
1655 
PrintRuntimeImports(io::Printer * printer,const std::vector<string> & header_to_import,const string & runtime_import_prefix,bool default_cpp_symbol)1656 void ImportWriter::PrintRuntimeImports(
1657     io::Printer* printer,
1658     const std::vector<string>& header_to_import,
1659     const string& runtime_import_prefix,
1660     bool default_cpp_symbol) {
1661 
1662   // Given an override, use that.
1663   if (!runtime_import_prefix.empty()) {
1664     for (const auto& header : header_to_import) {
1665       printer->Print(
1666           " #import \"$import_prefix$/$header$\"\n",
1667           "import_prefix", runtime_import_prefix,
1668           "header", header);
1669     }
1670     return;
1671   }
1672 
1673   const string framework_name(ProtobufLibraryFrameworkName);
1674   const string cpp_symbol(ProtobufFrameworkImportSymbol(framework_name));
1675 
1676   if (default_cpp_symbol) {
1677     printer->Print(
1678         "// This CPP symbol can be defined to use imports that match up to the framework\n"
1679         "// imports needed when using CocoaPods.\n"
1680         "#if !defined($cpp_symbol$)\n"
1681         " #define $cpp_symbol$ 0\n"
1682         "#endif\n"
1683         "\n",
1684         "cpp_symbol", cpp_symbol);
1685   }
1686 
1687   printer->Print(
1688       "#if $cpp_symbol$\n",
1689       "cpp_symbol", cpp_symbol);
1690   for (const auto& header : header_to_import) {
1691     printer->Print(
1692         " #import <$framework_name$/$header$>\n",
1693         "framework_name", framework_name,
1694         "header", header);
1695   }
1696   printer->Print(
1697       "#else\n");
1698   for (const auto& header : header_to_import) {
1699     printer->Print(
1700         " #import \"$header$\"\n",
1701         "header", header);
1702   }
1703   printer->Print(
1704       "#endif\n");
1705 }
1706 
ParseFrameworkMappings()1707 void ImportWriter::ParseFrameworkMappings() {
1708   need_to_parse_mapping_file_ = false;
1709   if (named_framework_to_proto_path_mappings_path_.empty()) {
1710     return;  // Nothing to do.
1711   }
1712 
1713   ProtoFrameworkCollector collector(&proto_file_to_framework_name_);
1714   string parse_error;
1715   if (!ParseSimpleFile(named_framework_to_proto_path_mappings_path_,
1716                        &collector, &parse_error)) {
1717     std::cerr << "error parsing " << named_framework_to_proto_path_mappings_path_
1718          << " : " << parse_error << std::endl;
1719     std::cerr.flush();
1720   }
1721 }
1722 
ConsumeLine(const StringPiece & line,string * out_error)1723 bool ImportWriter::ProtoFrameworkCollector::ConsumeLine(
1724     const StringPiece& line, string* out_error) {
1725   int offset = line.find(':');
1726   if (offset == StringPiece::npos) {
1727     *out_error =
1728         string("Framework/proto file mapping line without colon sign: '") +
1729         string(line) + "'.";
1730     return false;
1731   }
1732   StringPiece framework_name = line.substr(0, offset);
1733   StringPiece proto_file_list = line.substr(offset + 1);
1734   TrimWhitespace(&framework_name);
1735 
1736   int start = 0;
1737   while (start < proto_file_list.length()) {
1738     offset = proto_file_list.find(',', start);
1739     if (offset == StringPiece::npos) {
1740       offset = proto_file_list.length();
1741     }
1742 
1743     StringPiece proto_file = proto_file_list.substr(start, offset - start);
1744     TrimWhitespace(&proto_file);
1745     if (!proto_file.empty()) {
1746       std::map<string, string>::iterator existing_entry =
1747           map_->find(string(proto_file));
1748       if (existing_entry != map_->end()) {
1749         std::cerr << "warning: duplicate proto file reference, replacing "
1750                      "framework entry for '"
1751                   << string(proto_file) << "' with '" << string(framework_name)
1752                   << "' (was '" << existing_entry->second << "')." << std::endl;
1753         std::cerr.flush();
1754       }
1755 
1756       if (proto_file.find(' ') != StringPiece::npos) {
1757         std::cerr << "note: framework mapping file had a proto file with a "
1758                      "space in, hopefully that isn't a missing comma: '"
1759                   << string(proto_file) << "'" << std::endl;
1760         std::cerr.flush();
1761       }
1762 
1763       (*map_)[string(proto_file)] = string(framework_name);
1764     }
1765 
1766     start = offset + 1;
1767   }
1768 
1769   return true;
1770 }
1771 
1772 
1773 }  // namespace objectivec
1774 }  // namespace compiler
1775 }  // namespace protobuf
1776 }  // namespace google
1777