1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // Author: kenton@google.com (Kenton Varda) 32 // Based on original Protocol Buffers design by 33 // Sanjay Ghemawat, Jeff Dean, and others. 34 35 #include <algorithm> 36 #include <limits> 37 #include <unordered_set> 38 #include <vector> 39 40 #include <google/protobuf/stubs/stringprintf.h> 41 #include <google/protobuf/compiler/java/java_helpers.h> 42 #include <google/protobuf/compiler/java/java_name_resolver.h> 43 #include <google/protobuf/descriptor.pb.h> 44 #include <google/protobuf/wire_format.h> 45 #include <google/protobuf/stubs/strutil.h> 46 #include <google/protobuf/stubs/substitute.h> 47 48 49 50 #include <google/protobuf/stubs/hash.h> // for hash<T *> 51 52 namespace google { 53 namespace protobuf { 54 namespace compiler { 55 namespace java { 56 57 using internal::WireFormat; 58 using internal::WireFormatLite; 59 60 const char kThickSeparator[] = 61 "// ===================================================================\n"; 62 const char kThinSeparator[] = 63 "// -------------------------------------------------------------------\n"; 64 65 namespace { 66 67 const char* kDefaultPackage = ""; 68 69 // Names that should be avoided as field names. 70 // Using them will cause the compiler to generate accessors whose names are 71 // colliding with methods defined in base classes. 72 const char* kForbiddenWordList[] = { 73 // message base class: 74 "cached_size", 75 "serialized_size", 76 // java.lang.Object: 77 "class", 78 }; 79 80 const std::unordered_set<string>* kReservedNames = 81 new std::unordered_set<string>({ 82 "abstract", "assert", "boolean", "break", "byte", 83 "case", "catch", "char", "class", "const", 84 "continue", "default", "do", "double", "else", 85 "enum", "extends", "final", "finally", "float", 86 "for", "goto", "if", "implements", "import", 87 "instanceof", "int", "interface", "long", "native", 88 "new", "package", "private", "protected", "public", 89 "return", "short", "static", "strictfp", "super", 90 "switch", "synchronized", "this", "throw", "throws", 91 "transient", "try", "void", "volatile", "while", 92 }); 93 IsForbidden(const std::string & field_name)94 bool IsForbidden(const std::string& field_name) { 95 for (int i = 0; i < GOOGLE_ARRAYSIZE(kForbiddenWordList); ++i) { 96 if (field_name == kForbiddenWordList[i]) { 97 return true; 98 } 99 } 100 return false; 101 } 102 FieldName(const FieldDescriptor * field)103 std::string FieldName(const FieldDescriptor* field) { 104 std::string field_name; 105 // Groups are hacky: The name of the field is just the lower-cased name 106 // of the group type. In Java, though, we would like to retain the original 107 // capitalization of the type name. 108 if (GetType(field) == FieldDescriptor::TYPE_GROUP) { 109 field_name = field->message_type()->name(); 110 } else { 111 field_name = field->name(); 112 } 113 if (IsForbidden(field_name)) { 114 // Append a trailing "#" to indicate that the name should be decorated to 115 // avoid collision with other names. 116 field_name += "#"; 117 } 118 return field_name; 119 } 120 121 122 } // namespace 123 PrintGeneratedAnnotation(io::Printer * printer,char delimiter,const std::string & annotation_file)124 void PrintGeneratedAnnotation(io::Printer* printer, char delimiter, 125 const std::string& annotation_file) { 126 if (annotation_file.empty()) { 127 return; 128 } 129 std::string ptemplate = 130 "@javax.annotation.Generated(value=\"protoc\", comments=\"annotations:"; 131 ptemplate.push_back(delimiter); 132 ptemplate.append("annotation_file"); 133 ptemplate.push_back(delimiter); 134 ptemplate.append("\")\n"); 135 printer->Print(ptemplate.c_str(), "annotation_file", annotation_file); 136 } 137 PrintEnumVerifierLogic(io::Printer * printer,const FieldDescriptor * descriptor,const std::map<std::string,std::string> & variables,const char * var_name,const char * terminating_string,bool enforce_lite)138 void PrintEnumVerifierLogic(io::Printer* printer, 139 const FieldDescriptor* descriptor, 140 const std::map<std::string, std::string>& variables, 141 const char* var_name, 142 const char* terminating_string, bool enforce_lite) { 143 std::string enum_verifier_string = 144 enforce_lite ? StrCat(var_name, ".internalGetVerifier()") 145 : StrCat( 146 "new com.google.protobuf.Internal.EnumVerifier() {\n" 147 " @java.lang.Override\n" 148 " public boolean isInRange(int number) {\n" 149 " return ", 150 var_name, 151 ".forNumber(number) != null;\n" 152 " }\n" 153 " }"); 154 printer->Print( 155 variables, 156 StrCat(enum_verifier_string, terminating_string).c_str()); 157 } 158 UnderscoresToCamelCase(const std::string & input,bool cap_next_letter)159 std::string UnderscoresToCamelCase(const std::string& input, 160 bool cap_next_letter) { 161 GOOGLE_CHECK(!input.empty()); 162 std::string result; 163 // Note: I distrust ctype.h due to locales. 164 for (int i = 0; i < input.size(); i++) { 165 if ('a' <= input[i] && input[i] <= 'z') { 166 if (cap_next_letter) { 167 result += input[i] + ('A' - 'a'); 168 } else { 169 result += input[i]; 170 } 171 cap_next_letter = false; 172 } else if ('A' <= input[i] && input[i] <= 'Z') { 173 if (i == 0 && !cap_next_letter) { 174 // Force first letter to lower-case unless explicitly told to 175 // capitalize it. 176 result += input[i] + ('a' - 'A'); 177 } else { 178 // Capital letters after the first are left as-is. 179 result += input[i]; 180 } 181 cap_next_letter = false; 182 } else if ('0' <= input[i] && input[i] <= '9') { 183 result += input[i]; 184 cap_next_letter = true; 185 } else { 186 cap_next_letter = true; 187 } 188 } 189 // Add a trailing "_" if the name should be altered. 190 if (input[input.size() - 1] == '#') { 191 result += '_'; 192 } 193 return result; 194 } 195 UnderscoresToCamelCase(const FieldDescriptor * field)196 std::string UnderscoresToCamelCase(const FieldDescriptor* field) { 197 return UnderscoresToCamelCase(FieldName(field), false); 198 } 199 UnderscoresToCapitalizedCamelCase(const FieldDescriptor * field)200 std::string UnderscoresToCapitalizedCamelCase(const FieldDescriptor* field) { 201 return UnderscoresToCamelCase(FieldName(field), true); 202 } 203 CapitalizedFieldName(const FieldDescriptor * field)204 std::string CapitalizedFieldName(const FieldDescriptor* field) { 205 return UnderscoresToCapitalizedCamelCase(field); 206 } 207 UnderscoresToCamelCase(const MethodDescriptor * method)208 std::string UnderscoresToCamelCase(const MethodDescriptor* method) { 209 return UnderscoresToCamelCase(method->name(), false); 210 } 211 UnderscoresToCamelCaseCheckReserved(const FieldDescriptor * field)212 std::string UnderscoresToCamelCaseCheckReserved(const FieldDescriptor* field) { 213 std::string name = UnderscoresToCamelCase(field); 214 if (kReservedNames->find(name) != kReservedNames->end()) { 215 return name + "_"; 216 } 217 return name; 218 } 219 UniqueFileScopeIdentifier(const Descriptor * descriptor)220 std::string UniqueFileScopeIdentifier(const Descriptor* descriptor) { 221 return "static_" + StringReplace(descriptor->full_name(), ".", "_", true); 222 } 223 CamelCaseFieldName(const FieldDescriptor * field)224 std::string CamelCaseFieldName(const FieldDescriptor* field) { 225 std::string fieldName = UnderscoresToCamelCase(field); 226 if ('0' <= fieldName[0] && fieldName[0] <= '9') { 227 return '_' + fieldName; 228 } 229 return fieldName; 230 } 231 StripProto(const std::string & filename)232 std::string StripProto(const std::string& filename) { 233 if (HasSuffixString(filename, ".protodevel")) { 234 return StripSuffixString(filename, ".protodevel"); 235 } else { 236 return StripSuffixString(filename, ".proto"); 237 } 238 } 239 FileClassName(const FileDescriptor * file,bool immutable)240 std::string FileClassName(const FileDescriptor* file, bool immutable) { 241 ClassNameResolver name_resolver; 242 return name_resolver.GetFileClassName(file, immutable); 243 } 244 FileJavaPackage(const FileDescriptor * file,bool immutable)245 std::string FileJavaPackage(const FileDescriptor* file, bool immutable) { 246 std::string result; 247 248 if (file->options().has_java_package()) { 249 result = file->options().java_package(); 250 } else { 251 result = kDefaultPackage; 252 if (!file->package().empty()) { 253 if (!result.empty()) result += '.'; 254 result += file->package(); 255 } 256 } 257 258 return result; 259 } 260 FileJavaPackage(const FileDescriptor * file)261 std::string FileJavaPackage(const FileDescriptor* file) { 262 return FileJavaPackage(file, true /* immutable */); 263 } 264 JavaPackageToDir(std::string package_name)265 std::string JavaPackageToDir(std::string package_name) { 266 std::string package_dir = StringReplace(package_name, ".", "/", true); 267 if (!package_dir.empty()) package_dir += "/"; 268 return package_dir; 269 } 270 ClassName(const Descriptor * descriptor)271 std::string ClassName(const Descriptor* descriptor) { 272 ClassNameResolver name_resolver; 273 return name_resolver.GetClassName(descriptor, true); 274 } 275 ClassName(const EnumDescriptor * descriptor)276 std::string ClassName(const EnumDescriptor* descriptor) { 277 ClassNameResolver name_resolver; 278 return name_resolver.GetClassName(descriptor, true); 279 } 280 ClassName(const ServiceDescriptor * descriptor)281 std::string ClassName(const ServiceDescriptor* descriptor) { 282 ClassNameResolver name_resolver; 283 return name_resolver.GetClassName(descriptor, true); 284 } 285 ClassName(const FileDescriptor * descriptor)286 std::string ClassName(const FileDescriptor* descriptor) { 287 ClassNameResolver name_resolver; 288 return name_resolver.GetClassName(descriptor, true); 289 } 290 291 ExtraMessageInterfaces(const Descriptor * descriptor)292 std::string ExtraMessageInterfaces(const Descriptor* descriptor) { 293 std::string interfaces = "// @@protoc_insertion_point(message_implements:" + 294 descriptor->full_name() + ")"; 295 return interfaces; 296 } 297 298 ExtraBuilderInterfaces(const Descriptor * descriptor)299 std::string ExtraBuilderInterfaces(const Descriptor* descriptor) { 300 std::string interfaces = "// @@protoc_insertion_point(builder_implements:" + 301 descriptor->full_name() + ")"; 302 return interfaces; 303 } 304 ExtraMessageOrBuilderInterfaces(const Descriptor * descriptor)305 std::string ExtraMessageOrBuilderInterfaces(const Descriptor* descriptor) { 306 std::string interfaces = "// @@protoc_insertion_point(interface_extends:" + 307 descriptor->full_name() + ")"; 308 return interfaces; 309 } 310 FieldConstantName(const FieldDescriptor * field)311 std::string FieldConstantName(const FieldDescriptor* field) { 312 std::string name = field->name() + "_FIELD_NUMBER"; 313 UpperString(&name); 314 return name; 315 } 316 GetType(const FieldDescriptor * field)317 FieldDescriptor::Type GetType(const FieldDescriptor* field) { 318 return field->type(); 319 } 320 GetJavaType(const FieldDescriptor * field)321 JavaType GetJavaType(const FieldDescriptor* field) { 322 switch (GetType(field)) { 323 case FieldDescriptor::TYPE_INT32: 324 case FieldDescriptor::TYPE_UINT32: 325 case FieldDescriptor::TYPE_SINT32: 326 case FieldDescriptor::TYPE_FIXED32: 327 case FieldDescriptor::TYPE_SFIXED32: 328 return JAVATYPE_INT; 329 330 case FieldDescriptor::TYPE_INT64: 331 case FieldDescriptor::TYPE_UINT64: 332 case FieldDescriptor::TYPE_SINT64: 333 case FieldDescriptor::TYPE_FIXED64: 334 case FieldDescriptor::TYPE_SFIXED64: 335 return JAVATYPE_LONG; 336 337 case FieldDescriptor::TYPE_FLOAT: 338 return JAVATYPE_FLOAT; 339 340 case FieldDescriptor::TYPE_DOUBLE: 341 return JAVATYPE_DOUBLE; 342 343 case FieldDescriptor::TYPE_BOOL: 344 return JAVATYPE_BOOLEAN; 345 346 case FieldDescriptor::TYPE_STRING: 347 return JAVATYPE_STRING; 348 349 case FieldDescriptor::TYPE_BYTES: 350 return JAVATYPE_BYTES; 351 352 case FieldDescriptor::TYPE_ENUM: 353 return JAVATYPE_ENUM; 354 355 case FieldDescriptor::TYPE_GROUP: 356 case FieldDescriptor::TYPE_MESSAGE: 357 return JAVATYPE_MESSAGE; 358 359 // No default because we want the compiler to complain if any new 360 // types are added. 361 } 362 363 GOOGLE_LOG(FATAL) << "Can't get here."; 364 return JAVATYPE_INT; 365 } 366 PrimitiveTypeName(JavaType type)367 const char* PrimitiveTypeName(JavaType type) { 368 switch (type) { 369 case JAVATYPE_INT: 370 return "int"; 371 case JAVATYPE_LONG: 372 return "long"; 373 case JAVATYPE_FLOAT: 374 return "float"; 375 case JAVATYPE_DOUBLE: 376 return "double"; 377 case JAVATYPE_BOOLEAN: 378 return "boolean"; 379 case JAVATYPE_STRING: 380 return "java.lang.String"; 381 case JAVATYPE_BYTES: 382 return "com.google.protobuf.ByteString"; 383 case JAVATYPE_ENUM: 384 return NULL; 385 case JAVATYPE_MESSAGE: 386 return NULL; 387 388 // No default because we want the compiler to complain if any new 389 // JavaTypes are added. 390 } 391 392 GOOGLE_LOG(FATAL) << "Can't get here."; 393 return NULL; 394 } 395 PrimitiveTypeName(const FieldDescriptor * descriptor)396 const char* PrimitiveTypeName(const FieldDescriptor* descriptor) { 397 return PrimitiveTypeName(GetJavaType(descriptor)); 398 } 399 BoxedPrimitiveTypeName(JavaType type)400 const char* BoxedPrimitiveTypeName(JavaType type) { 401 switch (type) { 402 case JAVATYPE_INT: 403 return "java.lang.Integer"; 404 case JAVATYPE_LONG: 405 return "java.lang.Long"; 406 case JAVATYPE_FLOAT: 407 return "java.lang.Float"; 408 case JAVATYPE_DOUBLE: 409 return "java.lang.Double"; 410 case JAVATYPE_BOOLEAN: 411 return "java.lang.Boolean"; 412 case JAVATYPE_STRING: 413 return "java.lang.String"; 414 case JAVATYPE_BYTES: 415 return "com.google.protobuf.ByteString"; 416 case JAVATYPE_ENUM: 417 return NULL; 418 case JAVATYPE_MESSAGE: 419 return NULL; 420 421 // No default because we want the compiler to complain if any new 422 // JavaTypes are added. 423 } 424 425 GOOGLE_LOG(FATAL) << "Can't get here."; 426 return NULL; 427 } 428 BoxedPrimitiveTypeName(const FieldDescriptor * descriptor)429 const char* BoxedPrimitiveTypeName(const FieldDescriptor* descriptor) { 430 return BoxedPrimitiveTypeName(GetJavaType(descriptor)); 431 } 432 GetOneofStoredType(const FieldDescriptor * field)433 std::string GetOneofStoredType(const FieldDescriptor* field) { 434 const JavaType javaType = GetJavaType(field); 435 switch (javaType) { 436 case JAVATYPE_ENUM: 437 return "java.lang.Integer"; 438 case JAVATYPE_MESSAGE: 439 return ClassName(field->message_type()); 440 default: 441 return BoxedPrimitiveTypeName(javaType); 442 } 443 } 444 FieldTypeName(FieldDescriptor::Type field_type)445 const char* FieldTypeName(FieldDescriptor::Type field_type) { 446 switch (field_type) { 447 case FieldDescriptor::TYPE_INT32: 448 return "INT32"; 449 case FieldDescriptor::TYPE_UINT32: 450 return "UINT32"; 451 case FieldDescriptor::TYPE_SINT32: 452 return "SINT32"; 453 case FieldDescriptor::TYPE_FIXED32: 454 return "FIXED32"; 455 case FieldDescriptor::TYPE_SFIXED32: 456 return "SFIXED32"; 457 case FieldDescriptor::TYPE_INT64: 458 return "INT64"; 459 case FieldDescriptor::TYPE_UINT64: 460 return "UINT64"; 461 case FieldDescriptor::TYPE_SINT64: 462 return "SINT64"; 463 case FieldDescriptor::TYPE_FIXED64: 464 return "FIXED64"; 465 case FieldDescriptor::TYPE_SFIXED64: 466 return "SFIXED64"; 467 case FieldDescriptor::TYPE_FLOAT: 468 return "FLOAT"; 469 case FieldDescriptor::TYPE_DOUBLE: 470 return "DOUBLE"; 471 case FieldDescriptor::TYPE_BOOL: 472 return "BOOL"; 473 case FieldDescriptor::TYPE_STRING: 474 return "STRING"; 475 case FieldDescriptor::TYPE_BYTES: 476 return "BYTES"; 477 case FieldDescriptor::TYPE_ENUM: 478 return "ENUM"; 479 case FieldDescriptor::TYPE_GROUP: 480 return "GROUP"; 481 case FieldDescriptor::TYPE_MESSAGE: 482 return "MESSAGE"; 483 484 // No default because we want the compiler to complain if any new 485 // types are added. 486 } 487 488 GOOGLE_LOG(FATAL) << "Can't get here."; 489 return NULL; 490 } 491 AllAscii(const std::string & text)492 bool AllAscii(const std::string& text) { 493 for (int i = 0; i < text.size(); i++) { 494 if ((text[i] & 0x80) != 0) { 495 return false; 496 } 497 } 498 return true; 499 } 500 DefaultValue(const FieldDescriptor * field,bool immutable,ClassNameResolver * name_resolver)501 std::string DefaultValue(const FieldDescriptor* field, bool immutable, 502 ClassNameResolver* name_resolver) { 503 // Switch on CppType since we need to know which default_value_* method 504 // of FieldDescriptor to call. 505 switch (field->cpp_type()) { 506 case FieldDescriptor::CPPTYPE_INT32: 507 return StrCat(field->default_value_int32()); 508 case FieldDescriptor::CPPTYPE_UINT32: 509 // Need to print as a signed int since Java has no unsigned. 510 return StrCat(static_cast<int32>(field->default_value_uint32())); 511 case FieldDescriptor::CPPTYPE_INT64: 512 return StrCat(field->default_value_int64()) + "L"; 513 case FieldDescriptor::CPPTYPE_UINT64: 514 return StrCat(static_cast<int64>(field->default_value_uint64())) + 515 "L"; 516 case FieldDescriptor::CPPTYPE_DOUBLE: { 517 double value = field->default_value_double(); 518 if (value == std::numeric_limits<double>::infinity()) { 519 return "Double.POSITIVE_INFINITY"; 520 } else if (value == -std::numeric_limits<double>::infinity()) { 521 return "Double.NEGATIVE_INFINITY"; 522 } else if (value != value) { 523 return "Double.NaN"; 524 } else { 525 return SimpleDtoa(value) + "D"; 526 } 527 } 528 case FieldDescriptor::CPPTYPE_FLOAT: { 529 float value = field->default_value_float(); 530 if (value == std::numeric_limits<float>::infinity()) { 531 return "Float.POSITIVE_INFINITY"; 532 } else if (value == -std::numeric_limits<float>::infinity()) { 533 return "Float.NEGATIVE_INFINITY"; 534 } else if (value != value) { 535 return "Float.NaN"; 536 } else { 537 return SimpleFtoa(value) + "F"; 538 } 539 } 540 case FieldDescriptor::CPPTYPE_BOOL: 541 return field->default_value_bool() ? "true" : "false"; 542 case FieldDescriptor::CPPTYPE_STRING: 543 if (GetType(field) == FieldDescriptor::TYPE_BYTES) { 544 if (field->has_default_value()) { 545 // See comments in Internal.java for gory details. 546 return strings::Substitute( 547 "com.google.protobuf.Internal.bytesDefaultValue(\"$0\")", 548 CEscape(field->default_value_string())); 549 } else { 550 return "com.google.protobuf.ByteString.EMPTY"; 551 } 552 } else { 553 if (AllAscii(field->default_value_string())) { 554 // All chars are ASCII. In this case CEscape() works fine. 555 return "\"" + CEscape(field->default_value_string()) + "\""; 556 } else { 557 // See comments in Internal.java for gory details. 558 return strings::Substitute( 559 "com.google.protobuf.Internal.stringDefaultValue(\"$0\")", 560 CEscape(field->default_value_string())); 561 } 562 } 563 564 case FieldDescriptor::CPPTYPE_ENUM: 565 return name_resolver->GetClassName(field->enum_type(), immutable) + "." + 566 field->default_value_enum()->name(); 567 568 case FieldDescriptor::CPPTYPE_MESSAGE: 569 return name_resolver->GetClassName(field->message_type(), immutable) + 570 ".getDefaultInstance()"; 571 572 // No default because we want the compiler to complain if any new 573 // types are added. 574 } 575 576 GOOGLE_LOG(FATAL) << "Can't get here."; 577 return ""; 578 } 579 IsDefaultValueJavaDefault(const FieldDescriptor * field)580 bool IsDefaultValueJavaDefault(const FieldDescriptor* field) { 581 // Switch on CppType since we need to know which default_value_* method 582 // of FieldDescriptor to call. 583 switch (field->cpp_type()) { 584 case FieldDescriptor::CPPTYPE_INT32: 585 return field->default_value_int32() == 0; 586 case FieldDescriptor::CPPTYPE_UINT32: 587 return field->default_value_uint32() == 0; 588 case FieldDescriptor::CPPTYPE_INT64: 589 return field->default_value_int64() == 0L; 590 case FieldDescriptor::CPPTYPE_UINT64: 591 return field->default_value_uint64() == 0L; 592 case FieldDescriptor::CPPTYPE_DOUBLE: 593 return field->default_value_double() == 0.0; 594 case FieldDescriptor::CPPTYPE_FLOAT: 595 return field->default_value_float() == 0.0; 596 case FieldDescriptor::CPPTYPE_BOOL: 597 return field->default_value_bool() == false; 598 case FieldDescriptor::CPPTYPE_ENUM: 599 return field->default_value_enum()->number() == 0; 600 case FieldDescriptor::CPPTYPE_STRING: 601 case FieldDescriptor::CPPTYPE_MESSAGE: 602 return false; 603 604 // No default because we want the compiler to complain if any new 605 // types are added. 606 } 607 608 GOOGLE_LOG(FATAL) << "Can't get here."; 609 return false; 610 } 611 IsByteStringWithCustomDefaultValue(const FieldDescriptor * field)612 bool IsByteStringWithCustomDefaultValue(const FieldDescriptor* field) { 613 return GetJavaType(field) == JAVATYPE_BYTES && 614 field->default_value_string() != ""; 615 } 616 617 const char* bit_masks[] = { 618 "0x00000001", "0x00000002", "0x00000004", "0x00000008", 619 "0x00000010", "0x00000020", "0x00000040", "0x00000080", 620 621 "0x00000100", "0x00000200", "0x00000400", "0x00000800", 622 "0x00001000", "0x00002000", "0x00004000", "0x00008000", 623 624 "0x00010000", "0x00020000", "0x00040000", "0x00080000", 625 "0x00100000", "0x00200000", "0x00400000", "0x00800000", 626 627 "0x01000000", "0x02000000", "0x04000000", "0x08000000", 628 "0x10000000", "0x20000000", "0x40000000", "0x80000000", 629 }; 630 GetBitFieldName(int index)631 std::string GetBitFieldName(int index) { 632 std::string varName = "bitField"; 633 varName += StrCat(index); 634 varName += "_"; 635 return varName; 636 } 637 GetBitFieldNameForBit(int bitIndex)638 std::string GetBitFieldNameForBit(int bitIndex) { 639 return GetBitFieldName(bitIndex / 32); 640 } 641 642 namespace { 643 GenerateGetBitInternal(const std::string & prefix,int bitIndex)644 std::string GenerateGetBitInternal(const std::string& prefix, int bitIndex) { 645 std::string varName = prefix + GetBitFieldNameForBit(bitIndex); 646 int bitInVarIndex = bitIndex % 32; 647 648 std::string mask = bit_masks[bitInVarIndex]; 649 std::string result = "((" + varName + " & " + mask + ") != 0)"; 650 return result; 651 } 652 GenerateSetBitInternal(const std::string & prefix,int bitIndex)653 std::string GenerateSetBitInternal(const std::string& prefix, int bitIndex) { 654 std::string varName = prefix + GetBitFieldNameForBit(bitIndex); 655 int bitInVarIndex = bitIndex % 32; 656 657 std::string mask = bit_masks[bitInVarIndex]; 658 std::string result = varName + " |= " + mask; 659 return result; 660 } 661 662 } // namespace 663 GenerateGetBit(int bitIndex)664 std::string GenerateGetBit(int bitIndex) { 665 return GenerateGetBitInternal("", bitIndex); 666 } 667 GenerateSetBit(int bitIndex)668 std::string GenerateSetBit(int bitIndex) { 669 return GenerateSetBitInternal("", bitIndex); 670 } 671 GenerateClearBit(int bitIndex)672 std::string GenerateClearBit(int bitIndex) { 673 std::string varName = GetBitFieldNameForBit(bitIndex); 674 int bitInVarIndex = bitIndex % 32; 675 676 std::string mask = bit_masks[bitInVarIndex]; 677 std::string result = varName + " = (" + varName + " & ~" + mask + ")"; 678 return result; 679 } 680 GenerateGetBitFromLocal(int bitIndex)681 std::string GenerateGetBitFromLocal(int bitIndex) { 682 return GenerateGetBitInternal("from_", bitIndex); 683 } 684 GenerateSetBitToLocal(int bitIndex)685 std::string GenerateSetBitToLocal(int bitIndex) { 686 return GenerateSetBitInternal("to_", bitIndex); 687 } 688 GenerateGetBitMutableLocal(int bitIndex)689 std::string GenerateGetBitMutableLocal(int bitIndex) { 690 return GenerateGetBitInternal("mutable_", bitIndex); 691 } 692 GenerateSetBitMutableLocal(int bitIndex)693 std::string GenerateSetBitMutableLocal(int bitIndex) { 694 return GenerateSetBitInternal("mutable_", bitIndex); 695 } 696 IsReferenceType(JavaType type)697 bool IsReferenceType(JavaType type) { 698 switch (type) { 699 case JAVATYPE_INT: 700 return false; 701 case JAVATYPE_LONG: 702 return false; 703 case JAVATYPE_FLOAT: 704 return false; 705 case JAVATYPE_DOUBLE: 706 return false; 707 case JAVATYPE_BOOLEAN: 708 return false; 709 case JAVATYPE_STRING: 710 return true; 711 case JAVATYPE_BYTES: 712 return true; 713 case JAVATYPE_ENUM: 714 return true; 715 case JAVATYPE_MESSAGE: 716 return true; 717 718 // No default because we want the compiler to complain if any new 719 // JavaTypes are added. 720 } 721 722 GOOGLE_LOG(FATAL) << "Can't get here."; 723 return false; 724 } 725 GetCapitalizedType(const FieldDescriptor * field,bool immutable)726 const char* GetCapitalizedType(const FieldDescriptor* field, bool immutable) { 727 switch (GetType(field)) { 728 case FieldDescriptor::TYPE_INT32: 729 return "Int32"; 730 case FieldDescriptor::TYPE_UINT32: 731 return "UInt32"; 732 case FieldDescriptor::TYPE_SINT32: 733 return "SInt32"; 734 case FieldDescriptor::TYPE_FIXED32: 735 return "Fixed32"; 736 case FieldDescriptor::TYPE_SFIXED32: 737 return "SFixed32"; 738 case FieldDescriptor::TYPE_INT64: 739 return "Int64"; 740 case FieldDescriptor::TYPE_UINT64: 741 return "UInt64"; 742 case FieldDescriptor::TYPE_SINT64: 743 return "SInt64"; 744 case FieldDescriptor::TYPE_FIXED64: 745 return "Fixed64"; 746 case FieldDescriptor::TYPE_SFIXED64: 747 return "SFixed64"; 748 case FieldDescriptor::TYPE_FLOAT: 749 return "Float"; 750 case FieldDescriptor::TYPE_DOUBLE: 751 return "Double"; 752 case FieldDescriptor::TYPE_BOOL: 753 return "Bool"; 754 case FieldDescriptor::TYPE_STRING: 755 return "String"; 756 case FieldDescriptor::TYPE_BYTES: { 757 return "Bytes"; 758 } 759 case FieldDescriptor::TYPE_ENUM: 760 return "Enum"; 761 case FieldDescriptor::TYPE_GROUP: 762 return "Group"; 763 case FieldDescriptor::TYPE_MESSAGE: 764 return "Message"; 765 766 // No default because we want the compiler to complain if any new 767 // types are added. 768 } 769 770 GOOGLE_LOG(FATAL) << "Can't get here."; 771 return NULL; 772 } 773 774 // For encodings with fixed sizes, returns that size in bytes. Otherwise 775 // returns -1. FixedSize(FieldDescriptor::Type type)776 int FixedSize(FieldDescriptor::Type type) { 777 switch (type) { 778 case FieldDescriptor::TYPE_INT32: 779 return -1; 780 case FieldDescriptor::TYPE_INT64: 781 return -1; 782 case FieldDescriptor::TYPE_UINT32: 783 return -1; 784 case FieldDescriptor::TYPE_UINT64: 785 return -1; 786 case FieldDescriptor::TYPE_SINT32: 787 return -1; 788 case FieldDescriptor::TYPE_SINT64: 789 return -1; 790 case FieldDescriptor::TYPE_FIXED32: 791 return WireFormatLite::kFixed32Size; 792 case FieldDescriptor::TYPE_FIXED64: 793 return WireFormatLite::kFixed64Size; 794 case FieldDescriptor::TYPE_SFIXED32: 795 return WireFormatLite::kSFixed32Size; 796 case FieldDescriptor::TYPE_SFIXED64: 797 return WireFormatLite::kSFixed64Size; 798 case FieldDescriptor::TYPE_FLOAT: 799 return WireFormatLite::kFloatSize; 800 case FieldDescriptor::TYPE_DOUBLE: 801 return WireFormatLite::kDoubleSize; 802 803 case FieldDescriptor::TYPE_BOOL: 804 return WireFormatLite::kBoolSize; 805 case FieldDescriptor::TYPE_ENUM: 806 return -1; 807 808 case FieldDescriptor::TYPE_STRING: 809 return -1; 810 case FieldDescriptor::TYPE_BYTES: 811 return -1; 812 case FieldDescriptor::TYPE_GROUP: 813 return -1; 814 case FieldDescriptor::TYPE_MESSAGE: 815 return -1; 816 817 // No default because we want the compiler to complain if any new 818 // types are added. 819 } 820 GOOGLE_LOG(FATAL) << "Can't get here."; 821 return -1; 822 } 823 824 // Sort the fields of the given Descriptor by number into a new[]'d array 825 // and return it. The caller should delete the returned array. SortFieldsByNumber(const Descriptor * descriptor)826 const FieldDescriptor** SortFieldsByNumber(const Descriptor* descriptor) { 827 const FieldDescriptor** fields = 828 new const FieldDescriptor*[descriptor->field_count()]; 829 for (int i = 0; i < descriptor->field_count(); i++) { 830 fields[i] = descriptor->field(i); 831 } 832 std::sort(fields, fields + descriptor->field_count(), 833 FieldOrderingByNumber()); 834 return fields; 835 } 836 837 // Returns true if the message type has any required fields. If it doesn't, 838 // we can optimize out calls to its isInitialized() method. 839 // 840 // already_seen is used to avoid checking the same type multiple times 841 // (and also to protect against recursion). HasRequiredFields(const Descriptor * type,std::unordered_set<const Descriptor * > * already_seen)842 bool HasRequiredFields(const Descriptor* type, 843 std::unordered_set<const Descriptor*>* already_seen) { 844 if (already_seen->count(type) > 0) { 845 // The type is already in cache. This means that either: 846 // a. The type has no required fields. 847 // b. We are in the midst of checking if the type has required fields, 848 // somewhere up the stack. In this case, we know that if the type 849 // has any required fields, they'll be found when we return to it, 850 // and the whole call to HasRequiredFields() will return true. 851 // Therefore, we don't have to check if this type has required fields 852 // here. 853 return false; 854 } 855 already_seen->insert(type); 856 857 // If the type has extensions, an extension with message type could contain 858 // required fields, so we have to be conservative and assume such an 859 // extension exists. 860 if (type->extension_range_count() > 0) return true; 861 862 for (int i = 0; i < type->field_count(); i++) { 863 const FieldDescriptor* field = type->field(i); 864 if (field->is_required()) { 865 return true; 866 } 867 if (GetJavaType(field) == JAVATYPE_MESSAGE) { 868 if (HasRequiredFields(field->message_type(), already_seen)) { 869 return true; 870 } 871 } 872 } 873 874 return false; 875 } 876 HasRequiredFields(const Descriptor * type)877 bool HasRequiredFields(const Descriptor* type) { 878 std::unordered_set<const Descriptor*> already_seen; 879 return HasRequiredFields(type, &already_seen); 880 } 881 HasRepeatedFields(const Descriptor * descriptor)882 bool HasRepeatedFields(const Descriptor* descriptor) { 883 for (int i = 0; i < descriptor->field_count(); ++i) { 884 const FieldDescriptor* field = descriptor->field(i); 885 if (field->is_repeated()) { 886 return true; 887 } 888 } 889 return false; 890 } 891 892 // Encode an unsigned 32-bit value into a sequence of UTF-16 characters. 893 // 894 // If the value is in [0x0000, 0xD7FF], we encode it with a single character 895 // with the same numeric value. 896 // 897 // If the value is larger than 0xD7FF, we encode its lowest 13 bits into a 898 // character in the range [0xE000, 0xFFFF] by combining these 13 bits with 899 // 0xE000 using logic-or. Then we shift the value to the right by 13 bits, and 900 // encode the remaining value by repeating this same process until we get to 901 // a value in [0x0000, 0xD7FF] where we will encode it using a character with 902 // the same numeric value. 903 // 904 // Note that we only use code points in [0x0000, 0xD7FF] and [0xE000, 0xFFFF]. 905 // There will be no surrogate pairs in the encoded character sequence. WriteUInt32ToUtf16CharSequence(uint32 number,std::vector<uint16> * output)906 void WriteUInt32ToUtf16CharSequence(uint32 number, 907 std::vector<uint16>* output) { 908 // For values in [0x0000, 0xD7FF], only use one char to encode it. 909 if (number < 0xD800) { 910 output->push_back(static_cast<uint16>(number)); 911 return; 912 } 913 // Encode into multiple chars. All except the last char will be in the range 914 // [0xE000, 0xFFFF], and the last char will be in the range [0x0000, 0xD7FF]. 915 // Note that we don't use any value in range [0xD800, 0xDFFF] because they 916 // have to come in pairs and the encoding is just more space-efficient w/o 917 // them. 918 while (number >= 0xD800) { 919 // [0xE000, 0xFFFF] can represent 13 bits of info. 920 output->push_back(static_cast<uint16>(0xE000 | (number & 0x1FFF))); 921 number >>= 13; 922 } 923 output->push_back(static_cast<uint16>(number)); 924 } 925 GetExperimentalJavaFieldTypeForSingular(const FieldDescriptor * field)926 int GetExperimentalJavaFieldTypeForSingular(const FieldDescriptor* field) { 927 // j/c/g/protobuf/FieldType.java lists field types in a slightly different 928 // order from FieldDescriptor::Type so we can't do a simple cast. 929 // 930 // TODO(xiaofeng): Make j/c/g/protobuf/FieldType.java follow the same order. 931 int result = field->type(); 932 if (result == FieldDescriptor::TYPE_GROUP) { 933 return 17; 934 } else if (result < FieldDescriptor::TYPE_GROUP) { 935 return result - 1; 936 } else { 937 return result - 2; 938 } 939 } 940 GetExperimentalJavaFieldTypeForRepeated(const FieldDescriptor * field)941 int GetExperimentalJavaFieldTypeForRepeated(const FieldDescriptor* field) { 942 if (field->type() == FieldDescriptor::TYPE_GROUP) { 943 return 49; 944 } else { 945 return GetExperimentalJavaFieldTypeForSingular(field) + 18; 946 } 947 } 948 GetExperimentalJavaFieldTypeForPacked(const FieldDescriptor * field)949 int GetExperimentalJavaFieldTypeForPacked(const FieldDescriptor* field) { 950 int result = field->type(); 951 if (result < FieldDescriptor::TYPE_STRING) { 952 return result + 34; 953 } else if (result > FieldDescriptor::TYPE_BYTES) { 954 return result + 30; 955 } else { 956 GOOGLE_LOG(FATAL) << field->full_name() << " can't be packed."; 957 return 0; 958 } 959 } 960 GetExperimentalJavaFieldType(const FieldDescriptor * field)961 int GetExperimentalJavaFieldType(const FieldDescriptor* field) { 962 static const int kMapFieldType = 50; 963 static const int kOneofFieldTypeOffset = 51; 964 static const int kRequiredBit = 0x100; 965 static const int kUtf8CheckBit = 0x200; 966 static const int kCheckInitialized = 0x400; 967 static const int kMapWithProto2EnumValue = 0x800; 968 int extra_bits = field->is_required() ? kRequiredBit : 0; 969 if (field->type() == FieldDescriptor::TYPE_STRING && CheckUtf8(field)) { 970 extra_bits |= kUtf8CheckBit; 971 } 972 if (field->is_required() || (GetJavaType(field) == JAVATYPE_MESSAGE && 973 HasRequiredFields(field->message_type()))) { 974 extra_bits |= kCheckInitialized; 975 } 976 977 if (field->is_map()) { 978 if (SupportFieldPresence(field->file())) { 979 const FieldDescriptor* value = 980 field->message_type()->FindFieldByName("value"); 981 if (GetJavaType(value) == JAVATYPE_ENUM) { 982 extra_bits |= kMapWithProto2EnumValue; 983 } 984 } 985 return kMapFieldType | extra_bits; 986 } else if (field->is_packed()) { 987 return GetExperimentalJavaFieldTypeForPacked(field); 988 } else if (field->is_repeated()) { 989 return GetExperimentalJavaFieldTypeForRepeated(field) | extra_bits; 990 } else if (field->containing_oneof() != NULL) { 991 return (GetExperimentalJavaFieldTypeForSingular(field) + 992 kOneofFieldTypeOffset) | 993 extra_bits; 994 } else { 995 return GetExperimentalJavaFieldTypeForSingular(field) | extra_bits; 996 } 997 } 998 999 // Escape a UTF-16 character to be embedded in a Java string. EscapeUtf16ToString(uint16 code,std::string * output)1000 void EscapeUtf16ToString(uint16 code, std::string* output) { 1001 if (code == '\t') { 1002 output->append("\\t"); 1003 } else if (code == '\b') { 1004 output->append("\\b"); 1005 } else if (code == '\n') { 1006 output->append("\\n"); 1007 } else if (code == '\r') { 1008 output->append("\\r"); 1009 } else if (code == '\f') { 1010 output->append("\\f"); 1011 } else if (code == '\'') { 1012 output->append("\\'"); 1013 } else if (code == '\"') { 1014 output->append("\\\""); 1015 } else if (code == '\\') { 1016 output->append("\\\\"); 1017 } else if (code >= 0x20 && code <= 0x7f) { 1018 output->push_back(static_cast<char>(code)); 1019 } else { 1020 output->append(StringPrintf("\\u%04x", code)); 1021 } 1022 } 1023 1024 } // namespace java 1025 } // namespace compiler 1026 } // namespace protobuf 1027 } // namespace google 1028