1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file or at 6 // https://developers.google.com/open-source/licenses/bsd 7 8 // Author: kenton@google.com (Kenton Varda) 9 // Based on original Protocol Buffers design by 10 // Sanjay Ghemawat, Jeff Dean, and others. 11 // 12 // Interface for manipulating databases of descriptors. 13 14 #ifndef GOOGLE_PROTOBUF_DESCRIPTOR_DATABASE_H__ 15 #define GOOGLE_PROTOBUF_DESCRIPTOR_DATABASE_H__ 16 17 #include <string> 18 #include <utility> 19 #include <vector> 20 21 #include "absl/container/btree_map.h" 22 #include "google/protobuf/descriptor.h" 23 #include "google/protobuf/port.h" 24 25 // Must be included last. 26 #include "google/protobuf/port_def.inc" 27 28 #ifdef SWIG 29 #error "You cannot SWIG proto headers" 30 #endif 31 32 namespace google { 33 namespace protobuf { 34 35 // Defined in this file. 36 class DescriptorDatabase; 37 class SimpleDescriptorDatabase; 38 class EncodedDescriptorDatabase; 39 class DescriptorPoolDatabase; 40 class MergedDescriptorDatabase; 41 42 // Abstract interface for a database of descriptors. 43 // 44 // This is useful if you want to create a DescriptorPool which loads 45 // descriptors on-demand from some sort of large database. If the database 46 // is large, it may be inefficient to enumerate every .proto file inside it 47 // calling DescriptorPool::BuildFile() for each one. Instead, a DescriptorPool 48 // can be created which wraps a DescriptorDatabase and only builds particular 49 // descriptors when they are needed. 50 class PROTOBUF_EXPORT DescriptorDatabase { 51 public: DescriptorDatabase()52 inline DescriptorDatabase() {} 53 DescriptorDatabase(const DescriptorDatabase&) = delete; 54 DescriptorDatabase& operator=(const DescriptorDatabase&) = delete; 55 virtual ~DescriptorDatabase(); 56 57 // Find a file by file name. Fills in in *output and returns true if found. 58 // Otherwise, returns false, leaving the contents of *output undefined. 59 virtual bool FindFileByName(const std::string& filename, 60 FileDescriptorProto* output) = 0; 61 62 // Find the file that declares the given fully-qualified symbol name. 63 // If found, fills in *output and returns true, otherwise returns false 64 // and leaves *output undefined. 65 virtual bool FindFileContainingSymbol(const std::string& symbol_name, 66 FileDescriptorProto* output) = 0; 67 68 // Find the file which defines an extension extending the given message type 69 // with the given field number. If found, fills in *output and returns true, 70 // otherwise returns false and leaves *output undefined. containing_type 71 // must be a fully-qualified type name. 72 virtual bool FindFileContainingExtension(const std::string& containing_type, 73 int field_number, 74 FileDescriptorProto* output) = 0; 75 76 // Finds the tag numbers used by all known extensions of 77 // extendee_type, and appends them to output in an undefined 78 // order. This method is best-effort: it's not guaranteed that the 79 // database will find all extensions, and it's not guaranteed that 80 // FindFileContainingExtension will return true on all of the found 81 // numbers. Returns true if the search was successful, otherwise 82 // returns false and leaves output unchanged. 83 // 84 // This method has a default implementation that always returns 85 // false. FindAllExtensionNumbers(const std::string &,std::vector<int> *)86 virtual bool FindAllExtensionNumbers(const std::string& /* extendee_type */, 87 std::vector<int>* /* output */) { 88 return false; 89 } 90 91 92 // Finds the file names and appends them to the output in an 93 // undefined order. This method is best-effort: it's not guaranteed that the 94 // database will find all files. Returns true if the database supports 95 // searching all file names, otherwise returns false and leaves output 96 // unchanged. 97 // 98 // This method has a default implementation that always returns 99 // false. FindAllFileNames(std::vector<std::string> *)100 virtual bool FindAllFileNames(std::vector<std::string>* /*output*/) { 101 return false; 102 } 103 104 // Finds the package names and appends them to the output in an 105 // undefined order. This method is best-effort: it's not guaranteed that the 106 // database will find all packages. Returns true if the database supports 107 // searching all package names, otherwise returns false and leaves output 108 // unchanged. 109 bool FindAllPackageNames(std::vector<std::string>* output); 110 111 // Finds the message names and appends them to the output in an 112 // undefined order. This method is best-effort: it's not guaranteed that the 113 // database will find all messages. Returns true if the database supports 114 // searching all message names, otherwise returns false and leaves output 115 // unchanged. 116 bool FindAllMessageNames(std::vector<std::string>* output); 117 }; 118 119 // A DescriptorDatabase into which you can insert files manually. 120 // 121 // FindFileContainingSymbol() is fully-implemented. When you add a file, its 122 // symbols will be indexed for this purpose. Note that the implementation 123 // may return false positives, but only if it isn't possible for the symbol 124 // to be defined in any other file. In particular, if a file defines a symbol 125 // "Foo", then searching for "Foo.[anything]" will match that file. This way, 126 // the database does not need to aggressively index all children of a symbol. 127 // 128 // FindFileContainingExtension() is mostly-implemented. It works if and only 129 // if the original FieldDescriptorProto defining the extension has a 130 // fully-qualified type name in its "extendee" field (i.e. starts with a '.'). 131 // If the extendee is a relative name, SimpleDescriptorDatabase will not 132 // attempt to resolve the type, so it will not know what type the extension is 133 // extending. Therefore, calling FindFileContainingExtension() with the 134 // extension's containing type will never actually find that extension. Note 135 // that this is an unlikely problem, as all FileDescriptorProtos created by the 136 // protocol compiler (as well as ones created by calling 137 // FileDescriptor::CopyTo()) will always use fully-qualified names for all 138 // types. You only need to worry if you are constructing FileDescriptorProtos 139 // yourself, or are calling compiler::Parser directly. 140 class PROTOBUF_EXPORT SimpleDescriptorDatabase : public DescriptorDatabase { 141 public: 142 SimpleDescriptorDatabase(); 143 SimpleDescriptorDatabase(const SimpleDescriptorDatabase&) = delete; 144 SimpleDescriptorDatabase& operator=(const SimpleDescriptorDatabase&) = delete; 145 ~SimpleDescriptorDatabase() override; 146 147 // Adds the FileDescriptorProto to the database, making a copy. The object 148 // can be deleted after Add() returns. Returns false if the file conflicted 149 // with a file already in the database, in which case an error will have 150 // been written to ABSL_LOG(ERROR). 151 bool Add(const FileDescriptorProto& file); 152 153 // Adds the FileDescriptorProto to the database and takes ownership of it. 154 bool AddAndOwn(const FileDescriptorProto* file); 155 156 // Adds the FileDescriptorProto to the database and not take ownership of it. 157 // The owner must ensure file outlives the SimpleDescriptorDatabase. 158 bool AddUnowned(const FileDescriptorProto* file); 159 160 // implements DescriptorDatabase ----------------------------------- 161 bool FindFileByName(const std::string& filename, 162 FileDescriptorProto* output) override; 163 bool FindFileContainingSymbol(const std::string& symbol_name, 164 FileDescriptorProto* output) override; 165 bool FindFileContainingExtension(const std::string& containing_type, 166 int field_number, 167 FileDescriptorProto* output) override; 168 bool FindAllExtensionNumbers(const std::string& extendee_type, 169 std::vector<int>* output) override; 170 171 bool FindAllFileNames(std::vector<std::string>* output) override; 172 173 private: 174 // An index mapping file names, symbol names, and extension numbers to 175 // some sort of values. 176 template <typename Value> 177 class DescriptorIndex { 178 public: 179 // Helpers to recursively add particular descriptors and all their contents 180 // to the index. 181 bool AddFile(const FileDescriptorProto& file, Value value); 182 bool AddSymbol(absl::string_view name, Value value); 183 bool AddNestedExtensions(const std::string& filename, 184 const DescriptorProto& message_type, Value value); 185 bool AddExtension(const std::string& filename, 186 const FieldDescriptorProto& field, Value value); 187 188 Value FindFile(const std::string& filename); 189 Value FindSymbol(const std::string& name); 190 Value FindExtension(const std::string& containing_type, int field_number); 191 bool FindAllExtensionNumbers(const std::string& containing_type, 192 std::vector<int>* output); 193 void FindAllFileNames(std::vector<std::string>* output); 194 195 private: 196 absl::btree_map<std::string, Value> by_name_; 197 absl::btree_map<std::string, Value> by_symbol_; 198 absl::btree_map<std::pair<std::string, int>, Value> by_extension_; 199 200 // Invariant: The by_symbol_ map does not contain any symbols which are 201 // prefixes of other symbols in the map. For example, "foo.bar" is a 202 // prefix of "foo.bar.baz" (but is not a prefix of "foo.barbaz"). 203 // 204 // This invariant is important because it means that given a symbol name, 205 // we can find a key in the map which is a prefix of the symbol in O(lg n) 206 // time, and we know that there is at most one such key. 207 // 208 // The prefix lookup algorithm works like so: 209 // 1) Find the last key in the map which is less than or equal to the 210 // search key. 211 // 2) If the found key is a prefix of the search key, then return it. 212 // Otherwise, there is no match. 213 // 214 // I am sure this algorithm has been described elsewhere, but since I 215 // wasn't able to find it quickly I will instead prove that it works 216 // myself. The key to the algorithm is that if a match exists, step (1) 217 // will find it. Proof: 218 // 1) Define the "search key" to be the key we are looking for, the "found 219 // key" to be the key found in step (1), and the "match key" to be the 220 // key which actually matches the search key (i.e. the key we're trying 221 // to find). 222 // 2) The found key must be less than or equal to the search key by 223 // definition. 224 // 3) The match key must also be less than or equal to the search key 225 // (because it is a prefix). 226 // 4) The match key cannot be greater than the found key, because if it 227 // were, then step (1) of the algorithm would have returned the match 228 // key instead (since it finds the *greatest* key which is less than or 229 // equal to the search key). 230 // 5) Therefore, the found key must be between the match key and the search 231 // key, inclusive. 232 // 6) Since the search key must be a sub-symbol of the match key, if it is 233 // not equal to the match key, then search_key[match_key.size()] must 234 // be '.'. 235 // 7) Since '.' sorts before any other character that is valid in a symbol 236 // name, then if the found key is not equal to the match key, then 237 // found_key[match_key.size()] must also be '.', because any other value 238 // would make it sort after the search key. 239 // 8) Therefore, if the found key is not equal to the match key, then the 240 // found key must be a sub-symbol of the match key. However, this would 241 // contradict our map invariant which says that no symbol in the map is 242 // a sub-symbol of any other. 243 // 9) Therefore, the found key must match the match key. 244 // 245 // The above proof assumes the match key exists. In the case that the 246 // match key does not exist, then step (1) will return some other symbol. 247 // That symbol cannot be a super-symbol of the search key since if it were, 248 // then it would be a match, and we're assuming the match key doesn't exist. 249 // Therefore, step 2 will correctly return no match. 250 }; 251 252 DescriptorIndex<const FileDescriptorProto*> index_; 253 std::vector<std::unique_ptr<const FileDescriptorProto>> files_to_delete_; 254 255 // If file is non-nullptr, copy it into *output and return true, otherwise 256 // return false. 257 bool MaybeCopy(const FileDescriptorProto* file, FileDescriptorProto* output); 258 }; 259 260 // Very similar to SimpleDescriptorDatabase, but stores all the descriptors 261 // as raw bytes and generally tries to use as little memory as possible. 262 // 263 // The same caveats regarding FindFileContainingExtension() apply as with 264 // SimpleDescriptorDatabase. 265 class PROTOBUF_EXPORT EncodedDescriptorDatabase : public DescriptorDatabase { 266 public: 267 EncodedDescriptorDatabase(); 268 EncodedDescriptorDatabase(const EncodedDescriptorDatabase&) = delete; 269 EncodedDescriptorDatabase& operator=(const EncodedDescriptorDatabase&) = 270 delete; 271 ~EncodedDescriptorDatabase() override; 272 273 // Adds the FileDescriptorProto to the database. The descriptor is provided 274 // in encoded form. The database does not make a copy of the bytes, nor 275 // does it take ownership; it's up to the caller to make sure the bytes 276 // remain valid for the life of the database. Returns false and logs an error 277 // if the bytes are not a valid FileDescriptorProto or if the file conflicted 278 // with a file already in the database. 279 bool Add(const void* encoded_file_descriptor, int size); 280 281 // Like Add(), but makes a copy of the data, so that the caller does not 282 // need to keep it around. 283 bool AddCopy(const void* encoded_file_descriptor, int size); 284 285 // Like FindFileContainingSymbol but returns only the name of the file. 286 bool FindNameOfFileContainingSymbol(const std::string& symbol_name, 287 std::string* output); 288 289 // implements DescriptorDatabase ----------------------------------- 290 bool FindFileByName(const std::string& filename, 291 FileDescriptorProto* output) override; 292 bool FindFileContainingSymbol(const std::string& symbol_name, 293 FileDescriptorProto* output) override; 294 bool FindFileContainingExtension(const std::string& containing_type, 295 int field_number, 296 FileDescriptorProto* output) override; 297 bool FindAllExtensionNumbers(const std::string& extendee_type, 298 std::vector<int>* output) override; 299 bool FindAllFileNames(std::vector<std::string>* output) override; 300 301 private: 302 class DescriptorIndex; 303 // Keep DescriptorIndex by pointer to hide the implementation to keep a 304 // cleaner header. 305 std::unique_ptr<DescriptorIndex> index_; 306 std::vector<void*> files_to_delete_; 307 308 // If encoded_file.first is non-nullptr, parse the data into *output and 309 // return true, otherwise return false. 310 bool MaybeParse(std::pair<const void*, int> encoded_file, 311 FileDescriptorProto* output); 312 }; 313 314 struct PROTOBUF_EXPORT DescriptorPoolDatabaseOptions { 315 // If true, the database will preserve source code info when returning 316 // descriptors. 317 bool preserve_source_code_info = false; 318 }; 319 320 // A DescriptorDatabase that fetches files from a given pool. 321 class PROTOBUF_EXPORT DescriptorPoolDatabase : public DescriptorDatabase { 322 public: 323 explicit DescriptorPoolDatabase(const DescriptorPool& pool, 324 DescriptorPoolDatabaseOptions options = {}); 325 DescriptorPoolDatabase(const DescriptorPoolDatabase&) = delete; 326 DescriptorPoolDatabase& operator=(const DescriptorPoolDatabase&) = delete; 327 ~DescriptorPoolDatabase() override; 328 329 // implements DescriptorDatabase ----------------------------------- 330 bool FindFileByName(const std::string& filename, 331 FileDescriptorProto* output) override; 332 bool FindFileContainingSymbol(const std::string& symbol_name, 333 FileDescriptorProto* output) override; 334 bool FindFileContainingExtension(const std::string& containing_type, 335 int field_number, 336 FileDescriptorProto* output) override; 337 bool FindAllExtensionNumbers(const std::string& extendee_type, 338 std::vector<int>* output) override; 339 340 private: 341 const DescriptorPool& pool_; 342 DescriptorPoolDatabaseOptions options_; 343 }; 344 345 // A DescriptorDatabase that wraps two or more others. It first searches the 346 // first database and, if that fails, tries the second, and so on. 347 class PROTOBUF_EXPORT MergedDescriptorDatabase : public DescriptorDatabase { 348 public: 349 // Merge just two databases. The sources remain property of the caller. 350 MergedDescriptorDatabase(DescriptorDatabase* source1, 351 DescriptorDatabase* source2); 352 // Merge more than two databases. The sources remain property of the caller. 353 // The vector may be deleted after the constructor returns but the 354 // DescriptorDatabases need to stick around. 355 explicit MergedDescriptorDatabase( 356 const std::vector<DescriptorDatabase*>& sources); 357 MergedDescriptorDatabase(const MergedDescriptorDatabase&) = delete; 358 MergedDescriptorDatabase& operator=(const MergedDescriptorDatabase&) = delete; 359 ~MergedDescriptorDatabase() override; 360 361 // implements DescriptorDatabase ----------------------------------- 362 bool FindFileByName(const std::string& filename, 363 FileDescriptorProto* output) override; 364 bool FindFileContainingSymbol(const std::string& symbol_name, 365 FileDescriptorProto* output) override; 366 bool FindFileContainingExtension(const std::string& containing_type, 367 int field_number, 368 FileDescriptorProto* output) override; 369 // Merges the results of calling all databases. Returns true iff any 370 // of the databases returned true. 371 bool FindAllExtensionNumbers(const std::string& extendee_type, 372 std::vector<int>* output) override; 373 374 375 // This function is best-effort. Returns true if at least one underlying 376 // DescriptorDatabase returns true. 377 bool FindAllFileNames(std::vector<std::string>* output) override; 378 379 private: 380 std::vector<DescriptorDatabase*> sources_; 381 }; 382 383 } // namespace protobuf 384 } // namespace google 385 386 #include "google/protobuf/port_undef.inc" 387 388 #endif // GOOGLE_PROTOBUF_DESCRIPTOR_DATABASE_H__ 389