1 //===-- Twine.h - Fast Temporary String Concatenation -----------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_ADT_TWINE_H 11 #define LLVM_ADT_TWINE_H 12 13 #include "llvm/ADT/SmallVector.h" 14 #include "llvm/ADT/StringRef.h" 15 #include "llvm/Support/DataTypes.h" 16 #include "llvm/Support/ErrorHandling.h" 17 #include <cassert> 18 #include <string> 19 20 namespace llvm { 21 class raw_ostream; 22 23 /// Twine - A lightweight data structure for efficiently representing the 24 /// concatenation of temporary values as strings. 25 /// 26 /// A Twine is a kind of rope, it represents a concatenated string using a 27 /// binary-tree, where the string is the preorder of the nodes. Since the 28 /// Twine can be efficiently rendered into a buffer when its result is used, 29 /// it avoids the cost of generating temporary values for intermediate string 30 /// results -- particularly in cases when the Twine result is never 31 /// required. By explicitly tracking the type of leaf nodes, we can also avoid 32 /// the creation of temporary strings for conversions operations (such as 33 /// appending an integer to a string). 34 /// 35 /// A Twine is not intended for use directly and should not be stored, its 36 /// implementation relies on the ability to store pointers to temporary stack 37 /// objects which may be deallocated at the end of a statement. Twines should 38 /// only be used accepted as const references in arguments, when an API wishes 39 /// to accept possibly-concatenated strings. 40 /// 41 /// Twines support a special 'null' value, which always concatenates to form 42 /// itself, and renders as an empty string. This can be returned from APIs to 43 /// effectively nullify any concatenations performed on the result. 44 /// 45 /// \b Implementation 46 /// 47 /// Given the nature of a Twine, it is not possible for the Twine's 48 /// concatenation method to construct interior nodes; the result must be 49 /// represented inside the returned value. For this reason a Twine object 50 /// actually holds two values, the left- and right-hand sides of a 51 /// concatenation. We also have nullary Twine objects, which are effectively 52 /// sentinel values that represent empty strings. 53 /// 54 /// Thus, a Twine can effectively have zero, one, or two children. The \see 55 /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for 56 /// testing the number of children. 57 /// 58 /// We maintain a number of invariants on Twine objects (FIXME: Why): 59 /// - Nullary twines are always represented with their Kind on the left-hand 60 /// side, and the Empty kind on the right-hand side. 61 /// - Unary twines are always represented with the value on the left-hand 62 /// side, and the Empty kind on the right-hand side. 63 /// - If a Twine has another Twine as a child, that child should always be 64 /// binary (otherwise it could have been folded into the parent). 65 /// 66 /// These invariants are check by \see isValid(). 67 /// 68 /// \b Efficiency Considerations 69 /// 70 /// The Twine is designed to yield efficient and small code for common 71 /// situations. For this reason, the concat() method is inlined so that 72 /// concatenations of leaf nodes can be optimized into stores directly into a 73 /// single stack allocated object. 74 /// 75 /// In practice, not all compilers can be trusted to optimize concat() fully, 76 /// so we provide two additional methods (and accompanying operator+ 77 /// overloads) to guarantee that particularly important cases (cstring plus 78 /// StringRef) codegen as desired. 79 class Twine { 80 /// NodeKind - Represent the type of an argument. 81 enum NodeKind : unsigned char { 82 /// An empty string; the result of concatenating anything with it is also 83 /// empty. 84 NullKind, 85 86 /// The empty string. 87 EmptyKind, 88 89 /// A pointer to a Twine instance. 90 TwineKind, 91 92 /// A pointer to a C string instance. 93 CStringKind, 94 95 /// A pointer to an std::string instance. 96 StdStringKind, 97 98 /// A pointer to a StringRef instance. 99 StringRefKind, 100 101 /// A pointer to a SmallString instance. 102 SmallStringKind, 103 104 /// A char value reinterpreted as a pointer, to render as a character. 105 CharKind, 106 107 /// An unsigned int value reinterpreted as a pointer, to render as an 108 /// unsigned decimal integer. 109 DecUIKind, 110 111 /// An int value reinterpreted as a pointer, to render as a signed 112 /// decimal integer. 113 DecIKind, 114 115 /// A pointer to an unsigned long value, to render as an unsigned decimal 116 /// integer. 117 DecULKind, 118 119 /// A pointer to a long value, to render as a signed decimal integer. 120 DecLKind, 121 122 /// A pointer to an unsigned long long value, to render as an unsigned 123 /// decimal integer. 124 DecULLKind, 125 126 /// A pointer to a long long value, to render as a signed decimal integer. 127 DecLLKind, 128 129 /// A pointer to a uint64_t value, to render as an unsigned hexadecimal 130 /// integer. 131 UHexKind 132 }; 133 134 union Child 135 { 136 const Twine *twine; 137 const char *cString; 138 const std::string *stdString; 139 const StringRef *stringRef; 140 const SmallVectorImpl<char> *smallString; 141 char character; 142 unsigned int decUI; 143 int decI; 144 const unsigned long *decUL; 145 const long *decL; 146 const unsigned long long *decULL; 147 const long long *decLL; 148 const uint64_t *uHex; 149 }; 150 151 private: 152 /// LHS - The prefix in the concatenation, which may be uninitialized for 153 /// Null or Empty kinds. 154 Child LHS; 155 /// RHS - The suffix in the concatenation, which may be uninitialized for 156 /// Null or Empty kinds. 157 Child RHS; 158 /// LHSKind - The NodeKind of the left hand side, \see getLHSKind(). 159 NodeKind LHSKind; 160 /// RHSKind - The NodeKind of the right hand side, \see getRHSKind(). 161 NodeKind RHSKind; 162 163 private: 164 /// Construct a nullary twine; the kind must be NullKind or EmptyKind. Twine(NodeKind Kind)165 explicit Twine(NodeKind Kind) 166 : LHSKind(Kind), RHSKind(EmptyKind) { 167 assert(isNullary() && "Invalid kind!"); 168 } 169 170 /// Construct a binary twine. Twine(const Twine & LHS,const Twine & RHS)171 explicit Twine(const Twine &LHS, const Twine &RHS) 172 : LHSKind(TwineKind), RHSKind(TwineKind) { 173 this->LHS.twine = &LHS; 174 this->RHS.twine = &RHS; 175 assert(isValid() && "Invalid twine!"); 176 } 177 178 /// Construct a twine from explicit values. Twine(Child LHS,NodeKind LHSKind,Child RHS,NodeKind RHSKind)179 explicit Twine(Child LHS, NodeKind LHSKind, Child RHS, NodeKind RHSKind) 180 : LHS(LHS), RHS(RHS), LHSKind(LHSKind), RHSKind(RHSKind) { 181 assert(isValid() && "Invalid twine!"); 182 } 183 184 /// Since the intended use of twines is as temporary objects, assignments 185 /// when concatenating might cause undefined behavior or stack corruptions 186 Twine &operator=(const Twine &Other) = delete; 187 188 /// Check for the null twine. isNull()189 bool isNull() const { 190 return getLHSKind() == NullKind; 191 } 192 193 /// Check for the empty twine. isEmpty()194 bool isEmpty() const { 195 return getLHSKind() == EmptyKind; 196 } 197 198 /// Check if this is a nullary twine (null or empty). isNullary()199 bool isNullary() const { 200 return isNull() || isEmpty(); 201 } 202 203 /// Check if this is a unary twine. isUnary()204 bool isUnary() const { 205 return getRHSKind() == EmptyKind && !isNullary(); 206 } 207 208 /// Check if this is a binary twine. isBinary()209 bool isBinary() const { 210 return getLHSKind() != NullKind && getRHSKind() != EmptyKind; 211 } 212 213 /// Check if this is a valid twine (satisfying the invariants on 214 /// order and number of arguments). isValid()215 bool isValid() const { 216 // Nullary twines always have Empty on the RHS. 217 if (isNullary() && getRHSKind() != EmptyKind) 218 return false; 219 220 // Null should never appear on the RHS. 221 if (getRHSKind() == NullKind) 222 return false; 223 224 // The RHS cannot be non-empty if the LHS is empty. 225 if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind) 226 return false; 227 228 // A twine child should always be binary. 229 if (getLHSKind() == TwineKind && 230 !LHS.twine->isBinary()) 231 return false; 232 if (getRHSKind() == TwineKind && 233 !RHS.twine->isBinary()) 234 return false; 235 236 return true; 237 } 238 239 /// Get the NodeKind of the left-hand side. getLHSKind()240 NodeKind getLHSKind() const { return LHSKind; } 241 242 /// Get the NodeKind of the right-hand side. getRHSKind()243 NodeKind getRHSKind() const { return RHSKind; } 244 245 /// Print one child from a twine. 246 void printOneChild(raw_ostream &OS, Child Ptr, NodeKind Kind) const; 247 248 /// Print the representation of one child from a twine. 249 void printOneChildRepr(raw_ostream &OS, Child Ptr, 250 NodeKind Kind) const; 251 252 public: 253 /// @name Constructors 254 /// @{ 255 256 /// Construct from an empty string. Twine()257 /*implicit*/ Twine() : LHSKind(EmptyKind), RHSKind(EmptyKind) { 258 assert(isValid() && "Invalid twine!"); 259 } 260 261 Twine(const Twine &) = default; 262 263 /// Construct from a C string. 264 /// 265 /// We take care here to optimize "" into the empty twine -- this will be 266 /// optimized out for string constants. This allows Twine arguments have 267 /// default "" values, without introducing unnecessary string constants. Twine(const char * Str)268 /*implicit*/ Twine(const char *Str) 269 : RHSKind(EmptyKind) { 270 if (Str[0] != '\0') { 271 LHS.cString = Str; 272 LHSKind = CStringKind; 273 } else 274 LHSKind = EmptyKind; 275 276 assert(isValid() && "Invalid twine!"); 277 } 278 279 /// Construct from an std::string. Twine(const std::string & Str)280 /*implicit*/ Twine(const std::string &Str) 281 : LHSKind(StdStringKind), RHSKind(EmptyKind) { 282 LHS.stdString = &Str; 283 assert(isValid() && "Invalid twine!"); 284 } 285 286 /// Construct from a StringRef. Twine(const StringRef & Str)287 /*implicit*/ Twine(const StringRef &Str) 288 : LHSKind(StringRefKind), RHSKind(EmptyKind) { 289 LHS.stringRef = &Str; 290 assert(isValid() && "Invalid twine!"); 291 } 292 293 /// Construct from a SmallString. Twine(const SmallVectorImpl<char> & Str)294 /*implicit*/ Twine(const SmallVectorImpl<char> &Str) 295 : LHSKind(SmallStringKind), RHSKind(EmptyKind) { 296 LHS.smallString = &Str; 297 assert(isValid() && "Invalid twine!"); 298 } 299 300 /// Construct from a char. Twine(char Val)301 explicit Twine(char Val) 302 : LHSKind(CharKind), RHSKind(EmptyKind) { 303 LHS.character = Val; 304 } 305 306 /// Construct from a signed char. Twine(signed char Val)307 explicit Twine(signed char Val) 308 : LHSKind(CharKind), RHSKind(EmptyKind) { 309 LHS.character = static_cast<char>(Val); 310 } 311 312 /// Construct from an unsigned char. Twine(unsigned char Val)313 explicit Twine(unsigned char Val) 314 : LHSKind(CharKind), RHSKind(EmptyKind) { 315 LHS.character = static_cast<char>(Val); 316 } 317 318 /// Construct a twine to print \p Val as an unsigned decimal integer. Twine(unsigned Val)319 explicit Twine(unsigned Val) 320 : LHSKind(DecUIKind), RHSKind(EmptyKind) { 321 LHS.decUI = Val; 322 } 323 324 /// Construct a twine to print \p Val as a signed decimal integer. Twine(int Val)325 explicit Twine(int Val) 326 : LHSKind(DecIKind), RHSKind(EmptyKind) { 327 LHS.decI = Val; 328 } 329 330 /// Construct a twine to print \p Val as an unsigned decimal integer. Twine(const unsigned long & Val)331 explicit Twine(const unsigned long &Val) 332 : LHSKind(DecULKind), RHSKind(EmptyKind) { 333 LHS.decUL = &Val; 334 } 335 336 /// Construct a twine to print \p Val as a signed decimal integer. Twine(const long & Val)337 explicit Twine(const long &Val) 338 : LHSKind(DecLKind), RHSKind(EmptyKind) { 339 LHS.decL = &Val; 340 } 341 342 /// Construct a twine to print \p Val as an unsigned decimal integer. Twine(const unsigned long long & Val)343 explicit Twine(const unsigned long long &Val) 344 : LHSKind(DecULLKind), RHSKind(EmptyKind) { 345 LHS.decULL = &Val; 346 } 347 348 /// Construct a twine to print \p Val as a signed decimal integer. Twine(const long long & Val)349 explicit Twine(const long long &Val) 350 : LHSKind(DecLLKind), RHSKind(EmptyKind) { 351 LHS.decLL = &Val; 352 } 353 354 // FIXME: Unfortunately, to make sure this is as efficient as possible we 355 // need extra binary constructors from particular types. We can't rely on 356 // the compiler to be smart enough to fold operator+()/concat() down to the 357 // right thing. Yet. 358 359 /// Construct as the concatenation of a C string and a StringRef. Twine(const char * LHS,const StringRef & RHS)360 /*implicit*/ Twine(const char *LHS, const StringRef &RHS) 361 : LHSKind(CStringKind), RHSKind(StringRefKind) { 362 this->LHS.cString = LHS; 363 this->RHS.stringRef = &RHS; 364 assert(isValid() && "Invalid twine!"); 365 } 366 367 /// Construct as the concatenation of a StringRef and a C string. Twine(const StringRef & LHS,const char * RHS)368 /*implicit*/ Twine(const StringRef &LHS, const char *RHS) 369 : LHSKind(StringRefKind), RHSKind(CStringKind) { 370 this->LHS.stringRef = &LHS; 371 this->RHS.cString = RHS; 372 assert(isValid() && "Invalid twine!"); 373 } 374 375 /// Create a 'null' string, which is an empty string that always 376 /// concatenates to form another empty string. createNull()377 static Twine createNull() { 378 return Twine(NullKind); 379 } 380 381 /// @} 382 /// @name Numeric Conversions 383 /// @{ 384 385 // Construct a twine to print \p Val as an unsigned hexadecimal integer. utohexstr(const uint64_t & Val)386 static Twine utohexstr(const uint64_t &Val) { 387 Child LHS, RHS; 388 LHS.uHex = &Val; 389 RHS.twine = nullptr; 390 return Twine(LHS, UHexKind, RHS, EmptyKind); 391 } 392 393 /// @} 394 /// @name Predicate Operations 395 /// @{ 396 397 /// Check if this twine is trivially empty; a false return value does not 398 /// necessarily mean the twine is empty. isTriviallyEmpty()399 bool isTriviallyEmpty() const { 400 return isNullary(); 401 } 402 403 /// Return true if this twine can be dynamically accessed as a single 404 /// StringRef value with getSingleStringRef(). isSingleStringRef()405 bool isSingleStringRef() const { 406 if (getRHSKind() != EmptyKind) return false; 407 408 switch (getLHSKind()) { 409 case EmptyKind: 410 case CStringKind: 411 case StdStringKind: 412 case StringRefKind: 413 case SmallStringKind: 414 return true; 415 default: 416 return false; 417 } 418 } 419 420 /// @} 421 /// @name String Operations 422 /// @{ 423 424 Twine concat(const Twine &Suffix) const; 425 426 /// @} 427 /// @name Output & Conversion. 428 /// @{ 429 430 /// Return the twine contents as a std::string. 431 std::string str() const; 432 433 /// Append the concatenated string into the given SmallString or SmallVector. 434 void toVector(SmallVectorImpl<char> &Out) const; 435 436 /// This returns the twine as a single StringRef. This method is only valid 437 /// if isSingleStringRef() is true. getSingleStringRef()438 StringRef getSingleStringRef() const { 439 assert(isSingleStringRef() &&"This cannot be had as a single stringref!"); 440 switch (getLHSKind()) { 441 default: llvm_unreachable("Out of sync with isSingleStringRef"); 442 case EmptyKind: return StringRef(); 443 case CStringKind: return StringRef(LHS.cString); 444 case StdStringKind: return StringRef(*LHS.stdString); 445 case StringRefKind: return *LHS.stringRef; 446 case SmallStringKind: 447 return StringRef(LHS.smallString->data(), LHS.smallString->size()); 448 } 449 } 450 451 /// This returns the twine as a single StringRef if it can be 452 /// represented as such. Otherwise the twine is written into the given 453 /// SmallVector and a StringRef to the SmallVector's data is returned. toStringRef(SmallVectorImpl<char> & Out)454 StringRef toStringRef(SmallVectorImpl<char> &Out) const { 455 if (isSingleStringRef()) 456 return getSingleStringRef(); 457 toVector(Out); 458 return StringRef(Out.data(), Out.size()); 459 } 460 461 /// This returns the twine as a single null terminated StringRef if it 462 /// can be represented as such. Otherwise the twine is written into the 463 /// given SmallVector and a StringRef to the SmallVector's data is returned. 464 /// 465 /// The returned StringRef's size does not include the null terminator. 466 StringRef toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const; 467 468 /// Write the concatenated string represented by this twine to the 469 /// stream \p OS. 470 void print(raw_ostream &OS) const; 471 472 /// Dump the concatenated string represented by this twine to stderr. 473 void dump() const; 474 475 /// Write the representation of this twine to the stream \p OS. 476 void printRepr(raw_ostream &OS) const; 477 478 /// Dump the representation of this twine to stderr. 479 void dumpRepr() const; 480 481 /// @} 482 }; 483 484 /// @name Twine Inline Implementations 485 /// @{ 486 concat(const Twine & Suffix)487 inline Twine Twine::concat(const Twine &Suffix) const { 488 // Concatenation with null is null. 489 if (isNull() || Suffix.isNull()) 490 return Twine(NullKind); 491 492 // Concatenation with empty yields the other side. 493 if (isEmpty()) 494 return Suffix; 495 if (Suffix.isEmpty()) 496 return *this; 497 498 // Otherwise we need to create a new node, taking care to fold in unary 499 // twines. 500 Child NewLHS, NewRHS; 501 NewLHS.twine = this; 502 NewRHS.twine = &Suffix; 503 NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind; 504 if (isUnary()) { 505 NewLHS = LHS; 506 NewLHSKind = getLHSKind(); 507 } 508 if (Suffix.isUnary()) { 509 NewRHS = Suffix.LHS; 510 NewRHSKind = Suffix.getLHSKind(); 511 } 512 513 return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind); 514 } 515 516 inline Twine operator+(const Twine &LHS, const Twine &RHS) { 517 return LHS.concat(RHS); 518 } 519 520 /// Additional overload to guarantee simplified codegen; this is equivalent to 521 /// concat(). 522 523 inline Twine operator+(const char *LHS, const StringRef &RHS) { 524 return Twine(LHS, RHS); 525 } 526 527 /// Additional overload to guarantee simplified codegen; this is equivalent to 528 /// concat(). 529 530 inline Twine operator+(const StringRef &LHS, const char *RHS) { 531 return Twine(LHS, RHS); 532 } 533 534 inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) { 535 RHS.print(OS); 536 return OS; 537 } 538 539 /// @} 540 } 541 542 #endif 543