1 //===- IdentifierTable.h - Hash table for identifier lookup -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Defines the clang::IdentifierInfo, clang::IdentifierTable, and 11 /// clang::Selector interfaces. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 16 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 17 18 #include "clang/Basic/LLVM.h" 19 #include "clang/Basic/TokenKinds.h" 20 #include "llvm/ADT/DenseMapInfo.h" 21 #include "llvm/ADT/SmallString.h" 22 #include "llvm/ADT/StringMap.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/Allocator.h" 25 #include "llvm/Support/PointerLikeTypeTraits.h" 26 #include "llvm/Support/type_traits.h" 27 #include <cassert> 28 #include <cstddef> 29 #include <cstdint> 30 #include <cstring> 31 #include <string> 32 #include <utility> 33 34 namespace clang { 35 36 class DeclarationName; 37 class DeclarationNameTable; 38 class IdentifierInfo; 39 class LangOptions; 40 class MultiKeywordSelector; 41 class SourceLocation; 42 43 /// A simple pair of identifier info and location. 44 using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>; 45 46 /// IdentifierInfo and other related classes are aligned to 47 /// 8 bytes so that DeclarationName can use the lower 3 bits 48 /// of a pointer to one of these classes. 49 enum { IdentifierInfoAlignment = 8 }; 50 51 static constexpr int ObjCOrBuiltinIDBits = 15; 52 53 /// One of these records is kept for each identifier that 54 /// is lexed. This contains information about whether the token was \#define'd, 55 /// is a language keyword, or if it is a front-end token of some sort (e.g. a 56 /// variable or function name). The preprocessor keeps this information in a 57 /// set, and all tok::identifier tokens have a pointer to one of these. 58 /// It is aligned to 8 bytes because DeclarationName needs the lower 3 bits. 59 class alignas(IdentifierInfoAlignment) IdentifierInfo { 60 friend class IdentifierTable; 61 62 // Front-end token ID or tok::identifier. 63 unsigned TokenID : 9; 64 65 // ObjC keyword ('protocol' in '@protocol') or builtin (__builtin_inf). 66 // First NUM_OBJC_KEYWORDS values are for Objective-C, 67 // the remaining values are for builtins. 68 unsigned ObjCOrBuiltinID : ObjCOrBuiltinIDBits; 69 70 // True if there is a #define for this. 71 unsigned HasMacro : 1; 72 73 // True if there was a #define for this. 74 unsigned HadMacro : 1; 75 76 // True if the identifier is a language extension. 77 unsigned IsExtension : 1; 78 79 // True if the identifier is a keyword in a newer or proposed Standard. 80 unsigned IsFutureCompatKeyword : 1; 81 82 // True if the identifier is poisoned. 83 unsigned IsPoisoned : 1; 84 85 // True if the identifier is a C++ operator keyword. 86 unsigned IsCPPOperatorKeyword : 1; 87 88 // Internal bit set by the member function RecomputeNeedsHandleIdentifier. 89 // See comment about RecomputeNeedsHandleIdentifier for more info. 90 unsigned NeedsHandleIdentifier : 1; 91 92 // True if the identifier was loaded (at least partially) from an AST file. 93 unsigned IsFromAST : 1; 94 95 // True if the identifier has changed from the definition 96 // loaded from an AST file. 97 unsigned ChangedAfterLoad : 1; 98 99 // True if the identifier's frontend information has changed from the 100 // definition loaded from an AST file. 101 unsigned FEChangedAfterLoad : 1; 102 103 // True if revertTokenIDToIdentifier was called. 104 unsigned RevertedTokenID : 1; 105 106 // True if there may be additional information about 107 // this identifier stored externally. 108 unsigned OutOfDate : 1; 109 110 // True if this is the 'import' contextual keyword. 111 unsigned IsModulesImport : 1; 112 113 // True if this is a mangled OpenMP variant name. 114 unsigned IsMangledOpenMPVariantName : 1; 115 116 // 28 bits left in a 64-bit word. 117 118 // Managed by the language front-end. 119 void *FETokenInfo = nullptr; 120 121 llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr; 122 IdentifierInfo()123 IdentifierInfo() 124 : TokenID(tok::identifier), ObjCOrBuiltinID(0), HasMacro(false), 125 HadMacro(false), IsExtension(false), IsFutureCompatKeyword(false), 126 IsPoisoned(false), IsCPPOperatorKeyword(false), 127 NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false), 128 FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false), 129 IsModulesImport(false), IsMangledOpenMPVariantName(false) {} 130 131 public: 132 IdentifierInfo(const IdentifierInfo &) = delete; 133 IdentifierInfo &operator=(const IdentifierInfo &) = delete; 134 IdentifierInfo(IdentifierInfo &&) = delete; 135 IdentifierInfo &operator=(IdentifierInfo &&) = delete; 136 137 /// Return true if this is the identifier for the specified string. 138 /// 139 /// This is intended to be used for string literals only: II->isStr("foo"). 140 template <std::size_t StrLen> isStr(const char (& Str)[StrLen])141 bool isStr(const char (&Str)[StrLen]) const { 142 return getLength() == StrLen-1 && 143 memcmp(getNameStart(), Str, StrLen-1) == 0; 144 } 145 146 /// Return true if this is the identifier for the specified StringRef. isStr(llvm::StringRef Str)147 bool isStr(llvm::StringRef Str) const { 148 llvm::StringRef ThisStr(getNameStart(), getLength()); 149 return ThisStr == Str; 150 } 151 152 /// Return the beginning of the actual null-terminated string for this 153 /// identifier. getNameStart()154 const char *getNameStart() const { return Entry->getKeyData(); } 155 156 /// Efficiently return the length of this identifier info. getLength()157 unsigned getLength() const { return Entry->getKeyLength(); } 158 159 /// Return the actual identifier string. getName()160 StringRef getName() const { 161 return StringRef(getNameStart(), getLength()); 162 } 163 164 /// Return true if this identifier is \#defined to some other value. 165 /// \note The current definition may be in a module and not currently visible. hasMacroDefinition()166 bool hasMacroDefinition() const { 167 return HasMacro; 168 } setHasMacroDefinition(bool Val)169 void setHasMacroDefinition(bool Val) { 170 if (HasMacro == Val) return; 171 172 HasMacro = Val; 173 if (Val) { 174 NeedsHandleIdentifier = true; 175 HadMacro = true; 176 } else { 177 RecomputeNeedsHandleIdentifier(); 178 } 179 } 180 /// Returns true if this identifier was \#defined to some value at any 181 /// moment. In this case there should be an entry for the identifier in the 182 /// macro history table in Preprocessor. hadMacroDefinition()183 bool hadMacroDefinition() const { 184 return HadMacro; 185 } 186 187 /// If this is a source-language token (e.g. 'for'), this API 188 /// can be used to cause the lexer to map identifiers to source-language 189 /// tokens. getTokenID()190 tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; } 191 192 /// True if revertTokenIDToIdentifier() was called. hasRevertedTokenIDToIdentifier()193 bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; } 194 195 /// Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2 196 /// compatibility. 197 /// 198 /// TokenID is normally read-only but there are 2 instances where we revert it 199 /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens 200 /// using this method so we can inform serialization about it. revertTokenIDToIdentifier()201 void revertTokenIDToIdentifier() { 202 assert(TokenID != tok::identifier && "Already at tok::identifier"); 203 TokenID = tok::identifier; 204 RevertedTokenID = true; 205 } revertIdentifierToTokenID(tok::TokenKind TK)206 void revertIdentifierToTokenID(tok::TokenKind TK) { 207 assert(TokenID == tok::identifier && "Should be at tok::identifier"); 208 TokenID = TK; 209 RevertedTokenID = false; 210 } 211 212 /// Return the preprocessor keyword ID for this identifier. 213 /// 214 /// For example, "define" will return tok::pp_define. 215 tok::PPKeywordKind getPPKeywordID() const; 216 217 /// Return the Objective-C keyword ID for the this identifier. 218 /// 219 /// For example, 'class' will return tok::objc_class if ObjC is enabled. getObjCKeywordID()220 tok::ObjCKeywordKind getObjCKeywordID() const { 221 if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS) 222 return tok::ObjCKeywordKind(ObjCOrBuiltinID); 223 else 224 return tok::objc_not_keyword; 225 } setObjCKeywordID(tok::ObjCKeywordKind ID)226 void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; } 227 228 /// Return a value indicating whether this is a builtin function. 229 /// 230 /// 0 is not-built-in. 1+ are specific builtin functions. getBuiltinID()231 unsigned getBuiltinID() const { 232 if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS) 233 return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS; 234 else 235 return 0; 236 } setBuiltinID(unsigned ID)237 void setBuiltinID(unsigned ID) { 238 ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS; 239 assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID 240 && "ID too large for field!"); 241 } 242 getObjCOrBuiltinID()243 unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; } setObjCOrBuiltinID(unsigned ID)244 void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; } 245 246 /// get/setExtension - Initialize information about whether or not this 247 /// language token is an extension. This controls extension warnings, and is 248 /// only valid if a custom token ID is set. isExtensionToken()249 bool isExtensionToken() const { return IsExtension; } setIsExtensionToken(bool Val)250 void setIsExtensionToken(bool Val) { 251 IsExtension = Val; 252 if (Val) 253 NeedsHandleIdentifier = true; 254 else 255 RecomputeNeedsHandleIdentifier(); 256 } 257 258 /// is/setIsFutureCompatKeyword - Initialize information about whether or not 259 /// this language token is a keyword in a newer or proposed Standard. This 260 /// controls compatibility warnings, and is only true when not parsing the 261 /// corresponding Standard. Once a compatibility problem has been diagnosed 262 /// with this keyword, the flag will be cleared. isFutureCompatKeyword()263 bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; } setIsFutureCompatKeyword(bool Val)264 void setIsFutureCompatKeyword(bool Val) { 265 IsFutureCompatKeyword = Val; 266 if (Val) 267 NeedsHandleIdentifier = true; 268 else 269 RecomputeNeedsHandleIdentifier(); 270 } 271 272 /// setIsPoisoned - Mark this identifier as poisoned. After poisoning, the 273 /// Preprocessor will emit an error every time this token is used. 274 void setIsPoisoned(bool Value = true) { 275 IsPoisoned = Value; 276 if (Value) 277 NeedsHandleIdentifier = true; 278 else 279 RecomputeNeedsHandleIdentifier(); 280 } 281 282 /// Return true if this token has been poisoned. isPoisoned()283 bool isPoisoned() const { return IsPoisoned; } 284 285 /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether 286 /// this identifier is a C++ alternate representation of an operator. 287 void setIsCPlusPlusOperatorKeyword(bool Val = true) { 288 IsCPPOperatorKeyword = Val; 289 } isCPlusPlusOperatorKeyword()290 bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; } 291 292 /// Return true if this token is a keyword in the specified language. 293 bool isKeyword(const LangOptions &LangOpts) const; 294 295 /// Return true if this token is a C++ keyword in the specified 296 /// language. 297 bool isCPlusPlusKeyword(const LangOptions &LangOpts) const; 298 299 /// Get and set FETokenInfo. The language front-end is allowed to associate 300 /// arbitrary metadata with this token. getFETokenInfo()301 void *getFETokenInfo() const { return FETokenInfo; } setFETokenInfo(void * T)302 void setFETokenInfo(void *T) { FETokenInfo = T; } 303 304 /// Return true if the Preprocessor::HandleIdentifier must be called 305 /// on a token of this identifier. 306 /// 307 /// If this returns false, we know that HandleIdentifier will not affect 308 /// the token. isHandleIdentifierCase()309 bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; } 310 311 /// Return true if the identifier in its current state was loaded 312 /// from an AST file. isFromAST()313 bool isFromAST() const { return IsFromAST; } 314 setIsFromAST()315 void setIsFromAST() { IsFromAST = true; } 316 317 /// Determine whether this identifier has changed since it was loaded 318 /// from an AST file. hasChangedSinceDeserialization()319 bool hasChangedSinceDeserialization() const { 320 return ChangedAfterLoad; 321 } 322 323 /// Note that this identifier has changed since it was loaded from 324 /// an AST file. setChangedSinceDeserialization()325 void setChangedSinceDeserialization() { 326 ChangedAfterLoad = true; 327 } 328 329 /// Determine whether the frontend token information for this 330 /// identifier has changed since it was loaded from an AST file. hasFETokenInfoChangedSinceDeserialization()331 bool hasFETokenInfoChangedSinceDeserialization() const { 332 return FEChangedAfterLoad; 333 } 334 335 /// Note that the frontend token information for this identifier has 336 /// changed since it was loaded from an AST file. setFETokenInfoChangedSinceDeserialization()337 void setFETokenInfoChangedSinceDeserialization() { 338 FEChangedAfterLoad = true; 339 } 340 341 /// Determine whether the information for this identifier is out of 342 /// date with respect to the external source. isOutOfDate()343 bool isOutOfDate() const { return OutOfDate; } 344 345 /// Set whether the information for this identifier is out of 346 /// date with respect to the external source. setOutOfDate(bool OOD)347 void setOutOfDate(bool OOD) { 348 OutOfDate = OOD; 349 if (OOD) 350 NeedsHandleIdentifier = true; 351 else 352 RecomputeNeedsHandleIdentifier(); 353 } 354 355 /// Determine whether this is the contextual keyword \c import. isModulesImport()356 bool isModulesImport() const { return IsModulesImport; } 357 358 /// Set whether this identifier is the contextual keyword \c import. setModulesImport(bool I)359 void setModulesImport(bool I) { 360 IsModulesImport = I; 361 if (I) 362 NeedsHandleIdentifier = true; 363 else 364 RecomputeNeedsHandleIdentifier(); 365 } 366 367 /// Determine whether this is the mangled name of an OpenMP variant. isMangledOpenMPVariantName()368 bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; } 369 370 /// Set whether this is the mangled name of an OpenMP variant. setMangledOpenMPVariantName(bool I)371 void setMangledOpenMPVariantName(bool I) { IsMangledOpenMPVariantName = I; } 372 373 /// Return true if this identifier is an editor placeholder. 374 /// 375 /// Editor placeholders are produced by the code-completion engine and are 376 /// represented as characters between '<#' and '#>' in the source code. An 377 /// example of auto-completed call with a placeholder parameter is shown 378 /// below: 379 /// \code 380 /// function(<#int x#>); 381 /// \endcode isEditorPlaceholder()382 bool isEditorPlaceholder() const { 383 return getName().startswith("<#") && getName().endswith("#>"); 384 } 385 386 /// Determine whether \p this is a name reserved for the implementation (C99 387 /// 7.1.3, C++ [lib.global.names]). 388 bool isReservedName(bool doubleUnderscoreOnly = false) const { 389 if (getLength() < 2) 390 return false; 391 const char *Name = getNameStart(); 392 return Name[0] == '_' && 393 (Name[1] == '_' || 394 (Name[1] >= 'A' && Name[1] <= 'Z' && !doubleUnderscoreOnly)); 395 } 396 397 /// Provide less than operator for lexicographical sorting. 398 bool operator<(const IdentifierInfo &RHS) const { 399 return getName() < RHS.getName(); 400 } 401 402 private: 403 /// The Preprocessor::HandleIdentifier does several special (but rare) 404 /// things to identifiers of various sorts. For example, it changes the 405 /// \c for keyword token from tok::identifier to tok::for. 406 /// 407 /// This method is very tied to the definition of HandleIdentifier. Any 408 /// change to it should be reflected here. RecomputeNeedsHandleIdentifier()409 void RecomputeNeedsHandleIdentifier() { 410 NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() || 411 isExtensionToken() || isFutureCompatKeyword() || 412 isOutOfDate() || isModulesImport(); 413 } 414 }; 415 416 /// An RAII object for [un]poisoning an identifier within a scope. 417 /// 418 /// \p II is allowed to be null, in which case objects of this type have 419 /// no effect. 420 class PoisonIdentifierRAIIObject { 421 IdentifierInfo *const II; 422 const bool OldValue; 423 424 public: PoisonIdentifierRAIIObject(IdentifierInfo * II,bool NewValue)425 PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue) 426 : II(II), OldValue(II ? II->isPoisoned() : false) { 427 if(II) 428 II->setIsPoisoned(NewValue); 429 } 430 ~PoisonIdentifierRAIIObject()431 ~PoisonIdentifierRAIIObject() { 432 if(II) 433 II->setIsPoisoned(OldValue); 434 } 435 }; 436 437 /// An iterator that walks over all of the known identifiers 438 /// in the lookup table. 439 /// 440 /// Since this iterator uses an abstract interface via virtual 441 /// functions, it uses an object-oriented interface rather than the 442 /// more standard C++ STL iterator interface. In this OO-style 443 /// iteration, the single function \c Next() provides dereference, 444 /// advance, and end-of-sequence checking in a single 445 /// operation. Subclasses of this iterator type will provide the 446 /// actual functionality. 447 class IdentifierIterator { 448 protected: 449 IdentifierIterator() = default; 450 451 public: 452 IdentifierIterator(const IdentifierIterator &) = delete; 453 IdentifierIterator &operator=(const IdentifierIterator &) = delete; 454 455 virtual ~IdentifierIterator(); 456 457 /// Retrieve the next string in the identifier table and 458 /// advances the iterator for the following string. 459 /// 460 /// \returns The next string in the identifier table. If there is 461 /// no such string, returns an empty \c StringRef. 462 virtual StringRef Next() = 0; 463 }; 464 465 /// Provides lookups to, and iteration over, IdentiferInfo objects. 466 class IdentifierInfoLookup { 467 public: 468 virtual ~IdentifierInfoLookup(); 469 470 /// Return the IdentifierInfo for the specified named identifier. 471 /// 472 /// Unlike the version in IdentifierTable, this returns a pointer instead 473 /// of a reference. If the pointer is null then the IdentifierInfo cannot 474 /// be found. 475 virtual IdentifierInfo* get(StringRef Name) = 0; 476 477 /// Retrieve an iterator into the set of all identifiers 478 /// known to this identifier lookup source. 479 /// 480 /// This routine provides access to all of the identifiers known to 481 /// the identifier lookup, allowing access to the contents of the 482 /// identifiers without introducing the overhead of constructing 483 /// IdentifierInfo objects for each. 484 /// 485 /// \returns A new iterator into the set of known identifiers. The 486 /// caller is responsible for deleting this iterator. 487 virtual IdentifierIterator *getIdentifiers(); 488 }; 489 490 /// Implements an efficient mapping from strings to IdentifierInfo nodes. 491 /// 492 /// This has no other purpose, but this is an extremely performance-critical 493 /// piece of the code, as each occurrence of every identifier goes through 494 /// here when lexed. 495 class IdentifierTable { 496 // Shark shows that using MallocAllocator is *much* slower than using this 497 // BumpPtrAllocator! 498 using HashTableTy = llvm::StringMap<IdentifierInfo *, llvm::BumpPtrAllocator>; 499 HashTableTy HashTable; 500 501 IdentifierInfoLookup* ExternalLookup; 502 503 public: 504 /// Create the identifier table. 505 explicit IdentifierTable(IdentifierInfoLookup *ExternalLookup = nullptr); 506 507 /// Create the identifier table, populating it with info about the 508 /// language keywords for the language specified by \p LangOpts. 509 explicit IdentifierTable(const LangOptions &LangOpts, 510 IdentifierInfoLookup *ExternalLookup = nullptr); 511 512 /// Set the external identifier lookup mechanism. setExternalIdentifierLookup(IdentifierInfoLookup * IILookup)513 void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) { 514 ExternalLookup = IILookup; 515 } 516 517 /// Retrieve the external identifier lookup object, if any. getExternalIdentifierLookup()518 IdentifierInfoLookup *getExternalIdentifierLookup() const { 519 return ExternalLookup; 520 } 521 getAllocator()522 llvm::BumpPtrAllocator& getAllocator() { 523 return HashTable.getAllocator(); 524 } 525 526 /// Return the identifier token info for the specified named 527 /// identifier. get(StringRef Name)528 IdentifierInfo &get(StringRef Name) { 529 auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first; 530 531 IdentifierInfo *&II = Entry.second; 532 if (II) return *II; 533 534 // No entry; if we have an external lookup, look there first. 535 if (ExternalLookup) { 536 II = ExternalLookup->get(Name); 537 if (II) 538 return *II; 539 } 540 541 // Lookups failed, make a new IdentifierInfo. 542 void *Mem = getAllocator().Allocate<IdentifierInfo>(); 543 II = new (Mem) IdentifierInfo(); 544 545 // Make sure getName() knows how to find the IdentifierInfo 546 // contents. 547 II->Entry = &Entry; 548 549 return *II; 550 } 551 get(StringRef Name,tok::TokenKind TokenCode)552 IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) { 553 IdentifierInfo &II = get(Name); 554 II.TokenID = TokenCode; 555 assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large"); 556 return II; 557 } 558 559 /// Gets an IdentifierInfo for the given name without consulting 560 /// external sources. 561 /// 562 /// This is a version of get() meant for external sources that want to 563 /// introduce or modify an identifier. If they called get(), they would 564 /// likely end up in a recursion. getOwn(StringRef Name)565 IdentifierInfo &getOwn(StringRef Name) { 566 auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first; 567 568 IdentifierInfo *&II = Entry.second; 569 if (II) 570 return *II; 571 572 // Lookups failed, make a new IdentifierInfo. 573 void *Mem = getAllocator().Allocate<IdentifierInfo>(); 574 II = new (Mem) IdentifierInfo(); 575 576 // Make sure getName() knows how to find the IdentifierInfo 577 // contents. 578 II->Entry = &Entry; 579 580 // If this is the 'import' contextual keyword, mark it as such. 581 if (Name.equals("import")) 582 II->setModulesImport(true); 583 584 return *II; 585 } 586 587 using iterator = HashTableTy::const_iterator; 588 using const_iterator = HashTableTy::const_iterator; 589 begin()590 iterator begin() const { return HashTable.begin(); } end()591 iterator end() const { return HashTable.end(); } size()592 unsigned size() const { return HashTable.size(); } 593 find(StringRef Name)594 iterator find(StringRef Name) const { return HashTable.find(Name); } 595 596 /// Print some statistics to stderr that indicate how well the 597 /// hashing is doing. 598 void PrintStats() const; 599 600 /// Populate the identifier table with info about the language keywords 601 /// for the language specified by \p LangOpts. 602 void AddKeywords(const LangOptions &LangOpts); 603 }; 604 605 /// A family of Objective-C methods. 606 /// 607 /// These families have no inherent meaning in the language, but are 608 /// nonetheless central enough in the existing implementations to 609 /// merit direct AST support. While, in theory, arbitrary methods can 610 /// be considered to form families, we focus here on the methods 611 /// involving allocation and retain-count management, as these are the 612 /// most "core" and the most likely to be useful to diverse clients 613 /// without extra information. 614 /// 615 /// Both selectors and actual method declarations may be classified 616 /// into families. Method families may impose additional restrictions 617 /// beyond their selector name; for example, a method called '_init' 618 /// that returns void is not considered to be in the 'init' family 619 /// (but would be if it returned 'id'). It is also possible to 620 /// explicitly change or remove a method's family. Therefore the 621 /// method's family should be considered the single source of truth. 622 enum ObjCMethodFamily { 623 /// No particular method family. 624 OMF_None, 625 626 // Selectors in these families may have arbitrary arity, may be 627 // written with arbitrary leading underscores, and may have 628 // additional CamelCase "words" in their first selector chunk 629 // following the family name. 630 OMF_alloc, 631 OMF_copy, 632 OMF_init, 633 OMF_mutableCopy, 634 OMF_new, 635 636 // These families are singletons consisting only of the nullary 637 // selector with the given name. 638 OMF_autorelease, 639 OMF_dealloc, 640 OMF_finalize, 641 OMF_release, 642 OMF_retain, 643 OMF_retainCount, 644 OMF_self, 645 OMF_initialize, 646 647 // performSelector families 648 OMF_performSelector 649 }; 650 651 /// Enough bits to store any enumerator in ObjCMethodFamily or 652 /// InvalidObjCMethodFamily. 653 enum { ObjCMethodFamilyBitWidth = 4 }; 654 655 /// An invalid value of ObjCMethodFamily. 656 enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 }; 657 658 /// A family of Objective-C methods. 659 /// 660 /// These are family of methods whose result type is initially 'id', but 661 /// but are candidate for the result type to be changed to 'instancetype'. 662 enum ObjCInstanceTypeFamily { 663 OIT_None, 664 OIT_Array, 665 OIT_Dictionary, 666 OIT_Singleton, 667 OIT_Init, 668 OIT_ReturnsSelf 669 }; 670 671 enum ObjCStringFormatFamily { 672 SFF_None, 673 SFF_NSString, 674 SFF_CFString 675 }; 676 677 /// Smart pointer class that efficiently represents Objective-C method 678 /// names. 679 /// 680 /// This class will either point to an IdentifierInfo or a 681 /// MultiKeywordSelector (which is private). This enables us to optimize 682 /// selectors that take no arguments and selectors that take 1 argument, which 683 /// accounts for 78% of all selectors in Cocoa.h. 684 class Selector { 685 friend class Diagnostic; 686 friend class SelectorTable; // only the SelectorTable can create these 687 friend class DeclarationName; // and the AST's DeclarationName. 688 689 enum IdentifierInfoFlag { 690 // Empty selector = 0. Note that these enumeration values must 691 // correspond to the enumeration values of DeclarationName::StoredNameKind 692 ZeroArg = 0x01, 693 OneArg = 0x02, 694 MultiArg = 0x07, 695 ArgFlags = 0x07 696 }; 697 698 /// A pointer to the MultiKeywordSelector or IdentifierInfo. We use the low 699 /// three bits of InfoPtr to store an IdentifierInfoFlag. Note that in any 700 /// case IdentifierInfo and MultiKeywordSelector are already aligned to 701 /// 8 bytes even on 32 bits archs because of DeclarationName. 702 uintptr_t InfoPtr = 0; 703 Selector(IdentifierInfo * II,unsigned nArgs)704 Selector(IdentifierInfo *II, unsigned nArgs) { 705 InfoPtr = reinterpret_cast<uintptr_t>(II); 706 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo"); 707 assert(nArgs < 2 && "nArgs not equal to 0/1"); 708 InfoPtr |= nArgs+1; 709 } 710 Selector(MultiKeywordSelector * SI)711 Selector(MultiKeywordSelector *SI) { 712 InfoPtr = reinterpret_cast<uintptr_t>(SI); 713 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo"); 714 InfoPtr |= MultiArg; 715 } 716 getAsIdentifierInfo()717 IdentifierInfo *getAsIdentifierInfo() const { 718 if (getIdentifierInfoFlag() < MultiArg) 719 return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags); 720 return nullptr; 721 } 722 getMultiKeywordSelector()723 MultiKeywordSelector *getMultiKeywordSelector() const { 724 return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags); 725 } 726 getIdentifierInfoFlag()727 unsigned getIdentifierInfoFlag() const { 728 return InfoPtr & ArgFlags; 729 } 730 731 static ObjCMethodFamily getMethodFamilyImpl(Selector sel); 732 733 static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel); 734 735 public: 736 /// The default ctor should only be used when creating data structures that 737 /// will contain selectors. 738 Selector() = default; Selector(uintptr_t V)739 explicit Selector(uintptr_t V) : InfoPtr(V) {} 740 741 /// operator==/!= - Indicate whether the specified selectors are identical. 742 bool operator==(Selector RHS) const { 743 return InfoPtr == RHS.InfoPtr; 744 } 745 bool operator!=(Selector RHS) const { 746 return InfoPtr != RHS.InfoPtr; 747 } 748 getAsOpaquePtr()749 void *getAsOpaquePtr() const { 750 return reinterpret_cast<void*>(InfoPtr); 751 } 752 753 /// Determine whether this is the empty selector. isNull()754 bool isNull() const { return InfoPtr == 0; } 755 756 // Predicates to identify the selector type. isKeywordSelector()757 bool isKeywordSelector() const { 758 return getIdentifierInfoFlag() != ZeroArg; 759 } 760 isUnarySelector()761 bool isUnarySelector() const { 762 return getIdentifierInfoFlag() == ZeroArg; 763 } 764 765 /// If this selector is the specific keyword selector described by Names. 766 bool isKeywordSelector(ArrayRef<StringRef> Names) const; 767 768 /// If this selector is the specific unary selector described by Name. 769 bool isUnarySelector(StringRef Name) const; 770 771 unsigned getNumArgs() const; 772 773 /// Retrieve the identifier at a given position in the selector. 774 /// 775 /// Note that the identifier pointer returned may be NULL. Clients that only 776 /// care about the text of the identifier string, and not the specific, 777 /// uniqued identifier pointer, should use \c getNameForSlot(), which returns 778 /// an empty string when the identifier pointer would be NULL. 779 /// 780 /// \param argIndex The index for which we want to retrieve the identifier. 781 /// This index shall be less than \c getNumArgs() unless this is a keyword 782 /// selector, in which case 0 is the only permissible value. 783 /// 784 /// \returns the uniqued identifier for this slot, or NULL if this slot has 785 /// no corresponding identifier. 786 IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const; 787 788 /// Retrieve the name at a given position in the selector. 789 /// 790 /// \param argIndex The index for which we want to retrieve the name. 791 /// This index shall be less than \c getNumArgs() unless this is a keyword 792 /// selector, in which case 0 is the only permissible value. 793 /// 794 /// \returns the name for this slot, which may be the empty string if no 795 /// name was supplied. 796 StringRef getNameForSlot(unsigned argIndex) const; 797 798 /// Derive the full selector name (e.g. "foo:bar:") and return 799 /// it as an std::string. 800 std::string getAsString() const; 801 802 /// Prints the full selector name (e.g. "foo:bar:"). 803 void print(llvm::raw_ostream &OS) const; 804 805 void dump() const; 806 807 /// Derive the conventional family of this method. getMethodFamily()808 ObjCMethodFamily getMethodFamily() const { 809 return getMethodFamilyImpl(*this); 810 } 811 getStringFormatFamily()812 ObjCStringFormatFamily getStringFormatFamily() const { 813 return getStringFormatFamilyImpl(*this); 814 } 815 getEmptyMarker()816 static Selector getEmptyMarker() { 817 return Selector(uintptr_t(-1)); 818 } 819 getTombstoneMarker()820 static Selector getTombstoneMarker() { 821 return Selector(uintptr_t(-2)); 822 } 823 824 static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel); 825 }; 826 827 /// This table allows us to fully hide how we implement 828 /// multi-keyword caching. 829 class SelectorTable { 830 // Actually a SelectorTableImpl 831 void *Impl; 832 833 public: 834 SelectorTable(); 835 SelectorTable(const SelectorTable &) = delete; 836 SelectorTable &operator=(const SelectorTable &) = delete; 837 ~SelectorTable(); 838 839 /// Can create any sort of selector. 840 /// 841 /// \p NumArgs indicates whether this is a no argument selector "foo", a 842 /// single argument selector "foo:" or multi-argument "foo:bar:". 843 Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV); 844 getUnarySelector(IdentifierInfo * ID)845 Selector getUnarySelector(IdentifierInfo *ID) { 846 return Selector(ID, 1); 847 } 848 getNullarySelector(IdentifierInfo * ID)849 Selector getNullarySelector(IdentifierInfo *ID) { 850 return Selector(ID, 0); 851 } 852 853 /// Return the total amount of memory allocated for managing selectors. 854 size_t getTotalMemory() const; 855 856 /// Return the default setter name for the given identifier. 857 /// 858 /// This is "set" + \p Name where the initial character of \p Name 859 /// has been capitalized. 860 static SmallString<64> constructSetterName(StringRef Name); 861 862 /// Return the default setter selector for the given identifier. 863 /// 864 /// This is "set" + \p Name where the initial character of \p Name 865 /// has been capitalized. 866 static Selector constructSetterSelector(IdentifierTable &Idents, 867 SelectorTable &SelTable, 868 const IdentifierInfo *Name); 869 870 /// Return the property name for the given setter selector. 871 static std::string getPropertyNameFromSetterSelector(Selector Sel); 872 }; 873 874 namespace detail { 875 876 /// DeclarationNameExtra is used as a base of various uncommon special names. 877 /// This class is needed since DeclarationName has not enough space to store 878 /// the kind of every possible names. Therefore the kind of common names is 879 /// stored directly in DeclarationName, and the kind of uncommon names is 880 /// stored in DeclarationNameExtra. It is aligned to 8 bytes because 881 /// DeclarationName needs the lower 3 bits to store the kind of common names. 882 /// DeclarationNameExtra is tightly coupled to DeclarationName and any change 883 /// here is very likely to require changes in DeclarationName(Table). 884 class alignas(IdentifierInfoAlignment) DeclarationNameExtra { 885 friend class clang::DeclarationName; 886 friend class clang::DeclarationNameTable; 887 888 protected: 889 /// The kind of "extra" information stored in the DeclarationName. See 890 /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values 891 /// are used. Note that DeclarationName depends on the numerical values 892 /// of the enumerators in this enum. See DeclarationName::StoredNameKind 893 /// for more info. 894 enum ExtraKind { 895 CXXDeductionGuideName, 896 CXXLiteralOperatorName, 897 CXXUsingDirective, 898 ObjCMultiArgSelector 899 }; 900 901 /// ExtraKindOrNumArgs has one of the following meaning: 902 /// * The kind of an uncommon C++ special name. This DeclarationNameExtra 903 /// is in this case in fact either a CXXDeductionGuideNameExtra or 904 /// a CXXLiteralOperatorIdName. 905 /// 906 /// * It may be also name common to C++ using-directives (CXXUsingDirective), 907 /// 908 /// * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is 909 /// the number of arguments in the Objective-C selector, in which 910 /// case the DeclarationNameExtra is also a MultiKeywordSelector. 911 unsigned ExtraKindOrNumArgs; 912 DeclarationNameExtra(ExtraKind Kind)913 DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {} DeclarationNameExtra(unsigned NumArgs)914 DeclarationNameExtra(unsigned NumArgs) 915 : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {} 916 917 /// Return the corresponding ExtraKind. getKind()918 ExtraKind getKind() const { 919 return static_cast<ExtraKind>(ExtraKindOrNumArgs > 920 (unsigned)ObjCMultiArgSelector 921 ? (unsigned)ObjCMultiArgSelector 922 : ExtraKindOrNumArgs); 923 } 924 925 /// Return the number of arguments in an ObjC selector. Only valid when this 926 /// is indeed an ObjCMultiArgSelector. getNumArgs()927 unsigned getNumArgs() const { 928 assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector && 929 "getNumArgs called but this is not an ObjC selector!"); 930 return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector; 931 } 932 }; 933 934 } // namespace detail 935 936 } // namespace clang 937 938 namespace llvm { 939 940 /// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and 941 /// DenseSets. 942 template <> 943 struct DenseMapInfo<clang::Selector> { 944 static clang::Selector getEmptyKey() { 945 return clang::Selector::getEmptyMarker(); 946 } 947 948 static clang::Selector getTombstoneKey() { 949 return clang::Selector::getTombstoneMarker(); 950 } 951 952 static unsigned getHashValue(clang::Selector S); 953 954 static bool isEqual(clang::Selector LHS, clang::Selector RHS) { 955 return LHS == RHS; 956 } 957 }; 958 959 template<> 960 struct PointerLikeTypeTraits<clang::Selector> { 961 static const void *getAsVoidPointer(clang::Selector P) { 962 return P.getAsOpaquePtr(); 963 } 964 965 static clang::Selector getFromVoidPointer(const void *P) { 966 return clang::Selector(reinterpret_cast<uintptr_t>(P)); 967 } 968 969 static constexpr int NumLowBitsAvailable = 0; 970 }; 971 972 // Provide PointerLikeTypeTraits for IdentifierInfo pointers, which 973 // are not guaranteed to be 8-byte aligned. 974 template<> 975 struct PointerLikeTypeTraits<clang::IdentifierInfo*> { 976 static void *getAsVoidPointer(clang::IdentifierInfo* P) { 977 return P; 978 } 979 980 static clang::IdentifierInfo *getFromVoidPointer(void *P) { 981 return static_cast<clang::IdentifierInfo*>(P); 982 } 983 984 static constexpr int NumLowBitsAvailable = 1; 985 }; 986 987 template<> 988 struct PointerLikeTypeTraits<const clang::IdentifierInfo*> { 989 static const void *getAsVoidPointer(const clang::IdentifierInfo* P) { 990 return P; 991 } 992 993 static const clang::IdentifierInfo *getFromVoidPointer(const void *P) { 994 return static_cast<const clang::IdentifierInfo*>(P); 995 } 996 997 static constexpr int NumLowBitsAvailable = 1; 998 }; 999 1000 } // namespace llvm 1001 1002 #endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H 1003