1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_ASSIGNMENT_H_ 17 #define TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_ASSIGNMENT_H_ 18 19 #include <functional> 20 #include <iosfwd> 21 #include <memory> 22 #include <string> 23 #include <vector> 24 25 #include "absl/container/flat_hash_map.h" 26 #include "absl/container/flat_hash_set.h" 27 #include "absl/types/span.h" 28 #include "tensorflow/compiler/xla/service/heap_simulator.h" 29 #include "tensorflow/compiler/xla/service/hlo.pb.h" 30 #include "tensorflow/compiler/xla/service/hlo_alias_analysis.h" 31 #include "tensorflow/compiler/xla/service/hlo_computation.h" 32 #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h" 33 #include "tensorflow/compiler/xla/service/hlo_instruction.h" 34 #include "tensorflow/compiler/xla/service/hlo_live_range.h" 35 #include "tensorflow/compiler/xla/service/hlo_module.h" 36 #include "tensorflow/compiler/xla/service/logical_buffer.h" 37 #include "tensorflow/compiler/xla/service/memory_space_assignment.h" 38 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h" 39 #include "tensorflow/compiler/xla/statusor.h" 40 #include "tensorflow/compiler/xla/types.h" 41 #include "tensorflow/core/platform/logging.h" 42 #include "tensorflow/core/platform/macros.h" 43 #include "tensorflow/core/platform/types.h" 44 45 namespace xla { 46 47 // Walk the call graph of the HLO module and place each computation into either 48 // thread_local_computations or global_computations depending upon whether the 49 // computation requires thread-local allocations or global allocations. The 50 // elements in thread_local_computations and global_computations are in post 51 // order (if computation A has an instruction which calls computation B, then A 52 // will appear after B in the vector). 53 Status GatherComputationsByAllocationType( 54 const HloModule* module, 55 std::vector<const HloComputation*>* thread_local_computations, 56 std::vector<const HloComputation*>* global_computations); 57 58 // This class abstracts an allocation of contiguous memory which can hold the 59 // values described by LogicalBuffers. Each LogicalBuffer occupies a sub-range 60 // of the allocation, represented by a Slice. A single BufferAllocation may hold 61 // LogicalBuffers with disjoint liveness, which may have overlapping Slices. A 62 // single BufferAllocation may also hold LogicalBuffers with overlapping 63 // liveness, which must have disjoint Slices. 64 // 65 // The abstraction includes information required by the backends for allocation, 66 // use, and deallocation of the buffer. This includes the LogicalBuffers which 67 // are held in this allocation through the execution of the computation. 68 class BufferAllocation { 69 public: 70 // Holds a unique identifier for each allocation. Values are assigned 71 // contiguously and can be used as array indexes. 72 using Index = int64; 73 BufferAllocation(Index index,int64 size,LogicalBuffer::Color color)74 BufferAllocation(Index index, int64 size, LogicalBuffer::Color color) 75 : index_(index), size_(size), color_(color) {} ~BufferAllocation()76 ~BufferAllocation() {} 77 78 // Returns the index of this allocation. index()79 Index index() const { return index_; } 80 81 // Whether this allocation is used in a parallel calling context such as 82 // inside of a map or reduce computation. Such allocations need to be thread 83 // local. is_thread_local()84 bool is_thread_local() const { return is_thread_local_; } set_is_thread_local(bool is_thread_local)85 void set_is_thread_local(bool is_thread_local) { 86 is_thread_local_ = is_thread_local; 87 } 88 89 // Whether this allocation can be used by more than one logical buffer. is_reusable()90 bool is_reusable() const { 91 // We do not reuse thread-local buffers for now, because they are 92 // dynamically allocated and their lifetimes are hard to compute. 93 // 94 // TODO(b/34669761): Don't reuse tuple buffers because the GPU backend 95 // assumes longer buffer liveness than indicated by the analysis. 96 return !is_thread_local() && !is_tuple(); 97 } 98 99 // Whether this allocation is readonly i.e. backed by memory we cannot write 100 // to. is_readonly()101 bool is_readonly() const { 102 // Entry parameters are generally readonly, except when they are aliased 103 // with any output. 104 return (is_entry_computation_parameter() && 105 !is_parameter_aliased_with_output_) || 106 is_constant(); 107 } 108 is_tuple()109 bool is_tuple() const { return is_tuple_; } set_is_tuple(bool is_tuple)110 void set_is_tuple(bool is_tuple) { is_tuple_ = is_tuple; } 111 112 // Whether this allocation holds a LogicalBuffer from a parameter of the entry 113 // computation. These buffers have lifetimes which may be longer than the 114 // XLA computation. is_entry_computation_parameter()115 bool is_entry_computation_parameter() const { 116 return is_entry_computation_parameter_; 117 } 118 119 // Whether this allocation holds a constant. On the CPU and GPU backends 120 // constant allocations are not allocated dynamically, instead we resolve 121 // references to these buffer allocations to a global in the readonly section 122 // of the binary. is_constant()123 bool is_constant() const { return is_constant_; } 124 125 // If this allocation holds a Buffer from a parameter of the entry 126 // computation, this methods returns the parameter number. CHECKs otherwise. parameter_number()127 int64 parameter_number() const { 128 CHECK(is_entry_computation_parameter_); 129 return parameter_number_; 130 } 131 132 // If this allocation is for a parameter of the entry computation, this 133 // function returns which subshape of the parameter the allocation is for. param_shape_index()134 const ShapeIndex& param_shape_index() const { 135 CHECK(is_entry_computation_parameter_); 136 return param_shape_index_; 137 } 138 139 // Returns whether this allocation is assigned a LogicalBuffer which may 140 // be live out of the entry computation. maybe_live_out()141 bool maybe_live_out() const { return maybe_live_out_; } 142 set_maybe_live_out(bool value)143 void set_maybe_live_out(bool value) { maybe_live_out_ = value; } 144 145 // Returns the size of the allocation. Necessarily this must be at least as 146 // large as any LogicalBuffer assigned to this allocation. size()147 int64 size() const { return size_; } 148 149 // Returns the color of the allocation. Only logical buffers with a matching 150 // color can reside in this allocation. color()151 LogicalBuffer::Color color() const { return color_; } 152 153 struct OffsetSize { 154 int64 offset = 0; 155 int64 size = 0; 156 }; 157 158 // Access to the logical buffers assigned to this allocation, and their 159 // associated logical offsets and sizes. assigned_buffers()160 const absl::flat_hash_map<const HloValue*, OffsetSize>& assigned_buffers() 161 const { 162 return assigned_buffers_; 163 } 164 165 // A Slice represents a contiguous portion of a memory allocation. It is used 166 // to identify the memory range that a LogicalBuffer corresponds to. 167 class Slice { 168 public: Slice()169 Slice() {} Slice(const BufferAllocation * allocation,int64 offset,int64 size)170 Slice(const BufferAllocation* allocation, int64 offset, int64 size) 171 : allocation_(allocation), offset_(offset), size_(size) {} 172 allocation()173 const BufferAllocation* allocation() const { return allocation_; } index()174 Index index() const { return allocation_->index(); } offset()175 int64 offset() const { return offset_; } size()176 int64 size() const { return size_; } 177 178 bool operator==(const Slice& other) const { 179 return index() == other.index() && offset_ == other.offset_ && 180 size_ == other.size_; 181 } 182 bool operator!=(const Slice& other) const { return !(*this == other); } 183 bool operator<(const Slice& other) const { 184 if (index() != other.index()) return index() < other.index(); 185 if (offset_ != other.offset_) return offset_ < other.offset_; 186 return size_ < other.size_; 187 } 188 189 // Returns true iff this slice's memory range has a non-empty intersection 190 // with the other slice's memory range. OverlapsWith(const Slice & other)191 bool OverlapsWith(const Slice& other) const { 192 const int64 end = offset_ + size_; 193 const int64 other_end = other.offset_ + other.size_; 194 return index() == other.index() && offset_ < other_end && 195 end > other.offset_; 196 } 197 198 template <typename H> AbslHashValue(H h,const Slice & s)199 friend H AbslHashValue(H h, const Slice& s) { 200 return H::combine(std::move(h), s.index(), s.offset(), s.size()); 201 } 202 203 string ToString() const; 204 205 private: 206 const BufferAllocation* allocation_ = nullptr; 207 int64 offset_ = 0; 208 int64 size_ = 0; 209 }; 210 211 // GetSlice returns the Slice of contiguous memory that holds the value 212 // described by the given 'buffer'. 213 // REQUIRES: 'buffer' must be assigned to this allocation. 214 Slice GetSlice(const HloValue& buffer) const; 215 216 string ToString() const; 217 BufferAllocationProto ToProto() const; 218 219 // Whether the buffer is a parameter to or live out of the entry computation. IsInputOrOutput()220 bool IsInputOrOutput() const { 221 return is_entry_computation_parameter() || maybe_live_out(); 222 } 223 224 // Whether the buffer is a temporary buffer allocated before 225 // Executable::ExecuteOnStream. IsPreallocatedTempBuffer()226 bool IsPreallocatedTempBuffer() const { 227 // Parameters do not need temporary buffers. 228 return !is_entry_computation_parameter() && 229 // LogicalBuffers that maybe pointed to by the output should live out 230 // of the computation. 231 !maybe_live_out() && 232 // Thread-local buffers are allocated using `alloca`s. 233 !is_thread_local() && 234 // Constant buffers are allocated as global values. 235 !is_constant(); 236 } 237 238 // Add a heap trace which was used to assign slices to logical buffers in this 239 // allocation. A single BufferAllocation may include multiple heap traces 240 // in the case of the temporary block where there is a heap trace per 241 // computation. AddHeapTrace(const HeapSimulatorTrace & heap_trace)242 void AddHeapTrace(const HeapSimulatorTrace& heap_trace) { 243 heap_traces_.push_back(heap_trace); 244 } 245 246 // Return the set of heap traces used to assign slices to logical buffers in 247 // this allocation. HeapTraces()248 const std::vector<HeapSimulatorTrace> HeapTraces() const { 249 return heap_traces_; 250 } 251 252 // Returns the LogicalBuffers which are live at the point of peak memory usage 253 // for this allocation. The point of peak memory usage is the point at which 254 // the total size of all live logical buffers is maximal. If peak memory is 255 // reached at multiple points, the set of logical buffers live at the earliest 256 // maximal point is returned. The vector is stably sorted by 257 // BufferValue::Index. PeakMemoryLogicalBuffers()258 const std::vector<const HloValue*>& PeakMemoryLogicalBuffers() const { 259 return peak_buffers_; 260 } 261 262 // Get the number of bytes lost to fragmentation. This is equal to the 263 // difference between the size of the allocation and the size of the maximal 264 // live set. fragmentation_bytes()265 int64 fragmentation_bytes() const { return fragmentation_bytes_; } 266 267 bool operator==(const BufferAllocation& other) const { 268 return index_ == other.index_; 269 } 270 bool operator!=(const BufferAllocation& other) const { 271 return !(*this == other); 272 } 273 bool operator<(const BufferAllocation& other) const { 274 return index() < other.index(); 275 } 276 set_entry_computation_parameter(int64 parameter_number,ShapeIndex param_shape_index,bool parameter_aliased_with_output)277 void set_entry_computation_parameter(int64 parameter_number, 278 ShapeIndex param_shape_index, 279 bool parameter_aliased_with_output) { 280 is_entry_computation_parameter_ = true; 281 is_parameter_aliased_with_output_ = parameter_aliased_with_output; 282 parameter_number_ = parameter_number; 283 param_shape_index_ = std::move(param_shape_index); 284 } 285 286 private: 287 // Only BufferAssigner and BufferAssignment can modify BufferAllocation. 288 friend class BufferAssigner; 289 friend class BufferAssignment; 290 291 // Adds a LogicalBuffer to the set assigned to this buffer. 292 void AddAssignment(const HloValue& buffer, int64 offset, int64 size); 293 set_constant(bool is_constant)294 void set_constant(bool is_constant) { is_constant_ = is_constant; } set_index(Index index)295 void set_index(Index index) { index_ = index; } set_size(int64 size)296 void set_size(int64 size) { size_ = size; } 297 298 // The index of the allocation in the BufferAssignment. 299 Index index_; 300 301 // Size of the allocation in bytes. 302 int64 size_; 303 304 // Whether this buffer needs to be thread-local. 305 bool is_thread_local_ = false; 306 307 // Whether this buffer holds a tuple. 308 bool is_tuple_ = false; 309 310 // Color of the allocation. 311 LogicalBuffer::Color color_; 312 313 // Whether this allocation holds an entry computation parameter. Entry 314 // computation parameters are special because they have lifetimes which may 315 // outlast the computation. 316 bool is_entry_computation_parameter_ = false; 317 318 // Whether this entry computation parameter is aliased with output. 319 bool is_parameter_aliased_with_output_ = false; 320 321 // If this allocation holds an entry computation parameter, this field 322 // indicates the index (starting from 0) of the parameter. 323 int64 parameter_number_ = 0; 324 325 // If this buffer is for an entry computation parameter, which subshape of the 326 // parameter is it for? 327 ShapeIndex param_shape_index_; 328 329 // Whether the allocation contains a LogicalBuffer which may be live-out of 330 // the entry computation. Note that this flag is conservatively computed by 331 // TuplePointsToAnalysis. That is, an allocation marked `maybe_live_out_` 332 // might not actually escape. 333 bool maybe_live_out_ = false; 334 335 // See comment on the is_constant() accessor. 336 bool is_constant_ = false; 337 338 // Mapping from the set of buffers assigned to this allocation to their 339 // logical offsets and sizes. 340 absl::flat_hash_map<const HloValue*, OffsetSize> assigned_buffers_; 341 342 int64 fragmentation_bytes_ = 0; 343 std::vector<HeapSimulatorTrace> heap_traces_; 344 345 // Set of buffers live at the point of peak memory usage for this allocation. 346 std::vector<const HloValue*> peak_buffers_; 347 }; 348 349 // Add stream operators for nicer output of CHECK/RET_CHECK failures. 350 std::ostream& operator<<(std::ostream& out, const BufferAllocation& s); 351 std::ostream& operator<<(std::ostream& out, const BufferAllocation::Slice& s); 352 353 // This class encapsulates an assignment of the LogicalBuffers in an XLA 354 // module to a set of BufferAllocations. 355 class BufferAssignment { 356 public: 357 // Returns the vector containing all buffer allocations in this assignment. Allocations()358 const std::vector<BufferAllocation>& Allocations() const { 359 return allocations_; 360 } 361 362 // This is similar to copying Allocations(), but since it's moved out, it 363 // preserves the addresses. Since BufferAllocation::Slice keeps a 364 // BufferAllocation*, and some backends keep BufferAllocation::Slice in 365 // xla::Executables, migrating off the use of addresses can be hard. ReleaseAllocations()366 std::vector<BufferAllocation> ReleaseAllocations() { 367 return std::move(allocations_); 368 } 369 370 // Returns the total size allocation holding all temporary buffers. temp_allocation_total_size()371 int64 temp_allocation_total_size() const { 372 return temp_allocation_total_size_; 373 } 374 multiheap_size_constraint_per_heap()375 uint64 multiheap_size_constraint_per_heap() const { 376 return multiheap_size_constraint_per_heap_; 377 } 378 379 // Returns whether the given buffer has been assigned an allocation. 380 bool HasAllocation(const HloValue& value) const; 381 382 bool HasAllocation(const HloBuffer& buffer) const; 383 384 // Returns the allocation that a particular LogicalBuffer has been assigned 385 // to. CHECKs if buffer has not been assigned an allocation. 386 const BufferAllocation& GetAssignedAllocation(const HloValue& value) const; 387 388 const BufferAllocation& GetAssignedAllocation( 389 const HloBuffer& hlo_buffer) const; 390 391 // Returns the allocation with the given index. CHECKs if no allocation exists 392 // with the given index. 393 const BufferAllocation& GetAllocation(BufferAllocation::Index index) const; 394 395 // Returns the allocation with the given instruction and shape index. nullptr 396 // if no allocation exists. 397 const BufferAllocation* GetInstructionAllocation( 398 const HloInstruction* hlo, const ShapeIndex& shape_index) const; 399 400 // Builds and returns a vector containing the slices which might contain the 401 // subvalue at the given index of given instruction. 402 std::set<BufferAllocation::Slice> GetAllSlices( 403 const HloInstruction* instruction, const ShapeIndex& index) const; 404 405 // Convenience function which returns whether the buffer of the 406 // instruction at the given index is assigned an allocation. 407 bool HasAllocationAt(const HloInstruction* instruction, 408 const ShapeIndex& index) const; 409 410 // Convenience function which returns whether the top-level buffer of the 411 // instruction (index == {}) is assigned an allocation. 412 bool HasTopLevelAllocation(const HloInstruction* instruction) const; 413 414 // Convenience function which returns the unique slice containing the buffer 415 // at the given index of the given instruction. If a slice is not assigned or 416 // the slice cannot be determined at compile time then an error is returned. 417 StatusOr<BufferAllocation::Slice> GetUniqueSlice( 418 const HloInstruction* instruction, const ShapeIndex& index) const; 419 // Like GetUniqueSlice but fixes the index to the top-level of the shape 420 // (index = {}). 421 StatusOr<BufferAllocation::Slice> GetUniqueTopLevelSlice( 422 const HloInstruction* instruction) const; 423 // Like GetUniqueTopLevelSlice but returns the slice for the output of the 424 // entry computation of the HLO module (ie, the result of the XLA 425 // computation). 426 StatusOr<BufferAllocation::Slice> GetUniqueTopLevelOutputSlice() const; 427 428 // Returns the set BufferValues which may be the source of the value at the 429 // given index and instruction. GetSourceBuffers(const HloInstruction * instruction,const ShapeIndex & index)430 const std::vector<const HloValue*>& GetSourceBuffers( 431 const HloInstruction* instruction, const ShapeIndex& index) const { 432 return dataflow_analysis().GetValueSet(instruction, index).values(); 433 } 434 435 // Returns true if 'hlo_a{shape_index_a}' and 'hlo_b{shape_index_b}' 436 // share the same BufferAllocation::Slice. 437 // Returns false otherwise. 438 // REQUIRES: BufferAssignment assigned allocations to both instructions. 439 bool SharesSliceAtIndex(const HloInstruction* hlo_a, 440 const ShapeIndex& shape_index_a, 441 const HloInstruction* hlo_b, 442 const ShapeIndex& shape_index_b) const; 443 444 // Returns true if the top-level buffers of hlo_a and hlo_b are the same. 445 // REQUIRES: HasTopLevelAllocation(hlo_a) && HasTopLevelAllocation(hlo_b). SharesTopLevelSlice(const HloInstruction * hlo_a,const HloInstruction * hlo_b)446 bool SharesTopLevelSlice(const HloInstruction* hlo_a, 447 const HloInstruction* hlo_b) const { 448 return SharesSliceAtIndex(hlo_a, {}, hlo_b, {}); 449 } 450 451 // Returns true if hlo_a and hlo_b both have at least one buffer assigned for 452 // their top-level and each of their nested shape indices, and if hlo_a's 453 // buffers are all different from hlo_b's buffers. 454 bool HaveDisjointSlices(const HloInstruction* hlo_a, 455 const HloInstruction* hlo_b) const; 456 dataflow_analysis()457 const HloDataflowAnalysis& dataflow_analysis() const { 458 return alias_analysis_->dataflow_analysis(); 459 } 460 alias_analysis()461 HloAliasAnalysis& alias_analysis() const { return *alias_analysis_; } 462 hlo_ordering()463 const HloOrdering& hlo_ordering() const { return *hlo_ordering_; } 464 465 // Returns the HloLiveRange object used to construct this assignment. hlo_live_range()466 const HloLiveRange& hlo_live_range() const { return *hlo_live_range_; } 467 468 string ToString() const; 469 string BufferInfoString() const; 470 BufferAssignmentProto ToProto() const; 471 472 // Statistics for the assignment. Values initialized to -1 are not always 473 // collected; fragmentation is only collected for instructions that have a 474 // sequential total ordering. 475 struct Stats { 476 int64 parameter_allocation_count = 0; 477 int64 parameter_allocation_bytes = 0; 478 int64 constant_allocation_count = 0; 479 int64 constant_allocation_bytes = 0; 480 int64 maybe_live_out_allocation_count = 0; 481 int64 maybe_live_out_allocation_bytes = 0; 482 int64 preallocated_temp_allocation_count = 0; 483 int64 preallocated_temp_allocation_bytes = 0; 484 int64 preallocated_temp_fragmentation_bytes = -1; 485 int64 total_allocation_count = 0; 486 int64 total_allocation_bytes = 0; 487 int64 total_fragmentation_bytes = -1; 488 489 string ToString() const; 490 }; GetStats()491 const Stats& GetStats() const { return stats_; } 492 493 private: 494 // Only BufferAssigner can build or modify BufferAssignments. 495 friend class BufferAssigner; 496 BufferAssignment(const HloModule * module,std::unique_ptr<HloOrdering> hlo_ordering,BufferValue::SizeFunction buffer_size,LogicalBuffer::AlignmentFunction color_alignment,std::unique_ptr<HloAliasAnalysis> alias_analysis,std::unique_ptr<HloLiveRange> hlo_live_range)497 BufferAssignment(const HloModule* module, 498 std::unique_ptr<HloOrdering> hlo_ordering, 499 BufferValue::SizeFunction buffer_size, 500 LogicalBuffer::AlignmentFunction color_alignment, 501 std::unique_ptr<HloAliasAnalysis> alias_analysis, 502 std::unique_ptr<HloLiveRange> hlo_live_range) 503 : module_(module), 504 hlo_ordering_(std::move(hlo_ordering)), 505 buffer_size_(std::move(buffer_size)), 506 color_alignment_(std::move(color_alignment)), 507 alias_analysis_(std::move(alias_analysis)), 508 hlo_live_range_(std::move(hlo_live_range)) { 509 int32 raw_value = module->config() 510 .debug_options() 511 .xla_multiheap_size_constraint_per_heap(); 512 // -1 means no constraint. 513 multiheap_size_constraint_per_heap_ = 514 (raw_value == -1) ? UINT64_MAX : raw_value; 515 } 516 517 // Creates and returns a new BufferAllocation, with no assigned 518 // LogicalBuffers. Ownership is maintained internally. 519 BufferAllocation* NewEmptyAllocation(int64 size, LogicalBuffer::Color color); 520 521 // Helper that calls NewEmptyAllocation and AddAssignment in one call, 522 // creating an allocation containing a single LogicalBuffer. 523 BufferAllocation* NewAllocation(const HloBuffer& buffer, int64 size); 524 525 // Adds a LogicalBuffer to the set assigned to the given allocation. 526 void AddAssignment(BufferAllocation* allocation, const HloBuffer& buffer, 527 int64 offset, int64 size); 528 529 void AddAssignment(BufferAllocation* allocation, const HloValue& value, 530 int64 offset, int64 size); 531 532 // Returns the HloModule used to construct this assignment. module()533 const HloModule& module() const { return *module_; } 534 535 // Mutable accessors for allocations. 536 BufferAllocation* GetMutableAssignedAllocation(const HloBuffer& buffer); 537 BufferAllocation* GetMutableAllocation(BufferAllocation::Index index); 538 HloBufferSize(const HloBuffer & buffer)539 int64 HloBufferSize(const HloBuffer& buffer) { 540 int64 result = buffer_size_(*buffer.values()[0]); 541 for (const HloValue* value : buffer.values()) { 542 DCHECK_EQ(result, buffer_size_(*value)); 543 } 544 return result; 545 } 546 547 // Combines allocations of temporary buffers into one big BufferAllocation. 548 void CombineTempAllocations(); 549 550 // Computes stats for the assignment, to be retrieved by GetStats. 551 Status ComputeSummaryStats(); 552 553 // The vector of buffer allocations. Indexed by BufferAllocation::Index. 554 std::vector<BufferAllocation> allocations_; 555 556 // The total size of all temporary buffers. 557 int64 temp_allocation_total_size_ = 0; 558 559 uint64 multiheap_size_constraint_per_heap_; 560 561 // Maps Buffers to the index of the BufferAllocation which holds the buffer. 562 absl::flat_hash_map<const HloValue*, BufferAllocation::Index> 563 allocation_index_for_value_; 564 565 const HloModule* module_; 566 567 const std::unique_ptr<HloOrdering> hlo_ordering_; 568 569 // Function which returns the buffer size for a given logical buffer (shape). 570 BufferValue::SizeFunction buffer_size_; 571 572 // Function which returns the alignment for a given logical buffer color. 573 LogicalBuffer::AlignmentFunction color_alignment_; 574 575 std::unique_ptr<HloAliasAnalysis> alias_analysis_; 576 577 std::unique_ptr<HloLiveRange> hlo_live_range_; 578 579 Stats stats_; 580 581 TF_DISALLOW_COPY_AND_ASSIGN(BufferAssignment); 582 }; 583 584 // A class which constructs a buffer assignment. 585 class BufferAssigner { 586 public: 587 using Colorer = std::function<Status(HloAliasAnalysis*, const HloOrdering&)>; 588 DefaultColorer()589 static Colorer DefaultColorer() { 590 return [](HloAliasAnalysis* alias_analysis, const HloOrdering&) { 591 for (HloValue* value : alias_analysis->dataflow_analysis().values()) { 592 const HloPosition& defining_position = value->defining_position(); 593 if (defining_position.shape().has_layout()) { 594 value->set_color(BufferValue::Color( 595 defining_position.shape().layout().memory_space())); 596 } else { 597 value->set_color(BufferValue::Color(0)); 598 } 599 } 600 return Status::OK(); 601 }; 602 } 603 604 // Returns false if a buffer cannot be assigned to given allocation. 605 606 // Build and return a BufferAssignment for the given module. The given 607 // HloOrdering is used to determine buffer liveness. buffer_size and 608 // color_alignment are functions which returns the size and alignment of a 609 // LogicalBuffer. If preset_assignments is provided, those pre-set assignment 610 // offsets will be used. The caller guarantees that those assignments are 611 // valid and they do not overwrite each other. 612 static StatusOr<std::unique_ptr<BufferAssignment>> Run( 613 const HloModule* module, std::unique_ptr<HloOrdering> hlo_ordering, 614 BufferValue::SizeFunction buffer_size, 615 LogicalBuffer::AlignmentFunction color_alignment, 616 bool allocate_buffers_for_constants = false, 617 Colorer colorer = DefaultColorer(), 618 const absl::flat_hash_set<HloOpcode>& must_not_live_out = {}, 619 HloDataflowAnalysis::CanShareBuffer can_share_buffer = nullptr, 620 std::unique_ptr<PresetAssignments> preset_assignments = {}); 621 622 private: BufferAssigner(bool allocate_buffers_for_constants,Colorer colorer,const absl::flat_hash_set<HloOpcode> & must_not_live_out,std::unique_ptr<PresetAssignments> preset_assignments)623 BufferAssigner(bool allocate_buffers_for_constants, Colorer colorer, 624 const absl::flat_hash_set<HloOpcode>& must_not_live_out, 625 std::unique_ptr<PresetAssignments> preset_assignments) 626 : allocate_buffers_for_constants_(allocate_buffers_for_constants), 627 colorer_(colorer), 628 must_not_live_out_(must_not_live_out), 629 preset_assignments_(std::move(preset_assignments)) {} 630 virtual ~BufferAssigner() = default; 631 632 // Create a buffer assignment. 633 StatusOr<std::unique_ptr<BufferAssignment>> CreateAssignment( 634 const HloModule* module, std::unique_ptr<HloOrdering> hlo_ordering, 635 BufferValue::SizeFunction buffer_size, 636 LogicalBuffer::AlignmentFunction color_alignment, 637 HloDataflowAnalysis::CanShareBuffer can_share_buffer); 638 639 // Assigns buffers to the instructions in the given computations. "assignment" 640 // is modified to reflect the new buffer assignments. If is_thread_local is 641 // true, then all assigned buffers have the is_thread_local flag set to 642 // true. 643 Status AssignBuffersForComputations( 644 const std::vector<const HloComputation*>& computations, 645 bool is_thread_local, 646 absl::flat_hash_map<const HloComputation*, 647 absl::flat_hash_set<const HloValue*>>* 648 buffers_to_assign_sequentially, 649 BufferAssignment* assignment); 650 651 // Returns true if buffer's live range interferences with buffer2's. 652 bool LiveRangeInterferes(const HloValue* buffer1, const HloValue* buffer2, 653 BufferAssignment* assignment); 654 655 // Assigns pre-set assignments, if provided. These assignments will be added 656 // to assigned_buffers and skip buffer allocation. 657 Status AssignPresetBuffers( 658 absl::flat_hash_set<const HloBuffer*>* assigned_buffers, 659 BufferAssignment* assignment); 660 661 // Assigns a single hlo buffer to an HLO allocation. 662 Status AssignSingleHloBuffer( 663 const HloBuffer* hlo_buffer, bool is_thread_local, 664 absl::flat_hash_map<const HloComputation*, 665 absl::flat_hash_set<const HloValue*>>* 666 buffers_to_assign_sequentially, 667 std::vector<BufferAllocation::Index>* allocation_indices, 668 BufferAssignment* assignment); 669 670 // Assigns 'buffers_to_assign_sequentially' using heap simulation, assuming 671 // the HLO instructions will be executed in the sequential order given by 672 // assignment->liveness().hlo_ordering().SequentialOrder. If 673 // 'run_whole_module_heap_simulation' is true, the heap simulation will be run 674 // assuming all global computations are sequentially ordered. 675 Status AssignBuffersWithSequentialOrdering( 676 const absl::flat_hash_map<const HloComputation*, 677 absl::flat_hash_set<const HloValue*>>& 678 buffers_to_assign_sequentially, 679 bool run_whole_module_heap_simulation, BufferAssignment* assignment); 680 681 // Uses the results of the heap simulator to create a single allocation, with 682 // LogicalBuffers packed to specific offsets. 683 void AssignBuffersFromHeapSimulator( 684 const HeapSimulator::Result<HloValue>& result, 685 BufferAssignment* assignment, LogicalBuffer::Color color); 686 687 // Tries to assign the given instruction to the given buffer. Returns if the 688 // assignment was successful. 689 bool MaybeAssignBuffer(BufferAllocation* allocation, const HloBuffer& buffer, 690 BufferAssignment* assignment); 691 692 // Split a set of buffers into several sets, each of which contains buffers 693 // colored with the same color. 694 absl::flat_hash_map<LogicalBuffer::Color, 695 absl::flat_hash_set<const HloValue*>> 696 SplitBuffersByColor(const absl::flat_hash_set<const HloValue*>& buffers); 697 698 // If true, allocate buffers for constant instructions. 699 bool allocate_buffers_for_constants_; 700 701 // Functor used to assign colors to newly allocated logical buffers. 702 Colorer colorer_; 703 704 // A set of hlo opcodes that can't live out of a computation. 705 absl::flat_hash_set<HloOpcode> must_not_live_out_; 706 707 // Description of any buffer offsets that are already set by an earlier pass. 708 std::unique_ptr<PresetAssignments> preset_assignments_; 709 710 TF_DISALLOW_COPY_AND_ASSIGN(BufferAssigner); 711 }; 712 713 } // namespace xla 714 715 #endif // TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_ASSIGNMENT_H_ 716