• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_EXECUTABLE_H_
17 #define TENSORFLOW_COMPILER_XLA_SERVICE_EXECUTABLE_H_
18 
19 #include <memory>
20 #include <utility>
21 #include <vector>
22 
23 #include "absl/types/span.h"
24 #include "absl/types/variant.h"
25 #include "tensorflow/compiler/xla/debug_options_flags.h"
26 #include "tensorflow/compiler/xla/service/computation_layout.h"
27 #include "tensorflow/compiler/xla/service/hlo.pb.h"
28 #include "tensorflow/compiler/xla/service/hlo_execution_profile.h"
29 #include "tensorflow/compiler/xla/service/hlo_graph_dumper.h"
30 #include "tensorflow/compiler/xla/service/hlo_module.h"
31 #include "tensorflow/compiler/xla/service/maybe_owning_device_memory.h"
32 #include "tensorflow/compiler/xla/service/service_executable_run_options.h"
33 #include "tensorflow/compiler/xla/service/shaped_buffer.h"
34 #include "tensorflow/compiler/xla/shape_tree.h"
35 #include "tensorflow/compiler/xla/statusor.h"
36 #include "tensorflow/compiler/xla/util.h"
37 #include "tensorflow/compiler/xla/xla_data.pb.h"
38 #include "tensorflow/core/platform/mutex.h"
39 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
40 #include "tensorflow/core/platform/thread_annotations.h"
41 #include "tensorflow/stream_executor/device_memory_allocator.h"
42 
43 namespace xla {
44 
45 // ExecutionOutput encapsulates the output buffers of a execution and the
46 // leftover buffers to be released by the caller.
47 class ExecutionOutput {
48  public:
ExecutionOutput(ScopedShapedBuffer result,std::vector<se::OwningDeviceMemory> to_be_released,std::vector<ShapeIndex> aliased_indices,se::OwningDeviceMemory output_shape_table)49   ExecutionOutput(ScopedShapedBuffer result,
50                   std::vector<se::OwningDeviceMemory> to_be_released,
51                   std::vector<ShapeIndex> aliased_indices,
52                   se::OwningDeviceMemory output_shape_table)
53       : result_(std::move(result)),
54         to_be_released_(std::move(to_be_released)),
55         aliased_indices_(std::move(aliased_indices)),
56         output_shape_table_(std::move(output_shape_table)) {}
57   ExecutionOutput(ExecutionOutput&&) = default;
58   ExecutionOutput& operator=(ExecutionOutput&&) = default;
59 
~ExecutionOutput()60   ~ExecutionOutput() {
61     // If the ExecutionOutput has not been committed, and if there are aliased
62     // indices, clear them off the ScopedShapedBuffer to prevent them to be
63     // released.
64     for (auto& index : aliased_indices_) {
65       result_.set_buffer(se::OwningDeviceMemory(), index);
66     }
67   }
68 
69   // Should be called once it is known that the execute operation succeeded,
70   // before returning the ExecutionOutput to the caller.
Commit()71   ExecutionOutput& Commit() {
72     aliased_indices_.clear();
73     return *this;
74   }
75 
Result()76   const ScopedShapedBuffer& Result() const { return result_; }
77 
ShapeTable()78   const se::OwningDeviceMemory& ShapeTable() const {
79     return output_shape_table_;
80   }
81 
ConsumeResult()82   ScopedShapedBuffer ConsumeResult() {
83     aliased_indices_.clear();
84     return std::move(result_);
85   }
86 
ConsumeShapeTable()87   se::OwningDeviceMemory ConsumeShapeTable() {
88     return std::move(output_shape_table_);
89   }
90 
ToBeReleased()91   const std::vector<se::OwningDeviceMemory>& ToBeReleased() const {
92     return to_be_released_;
93   }
94 
ConsumeToBeReleased()95   std::vector<se::OwningDeviceMemory> ConsumeToBeReleased() {
96     return std::move(to_be_released_);
97   }
98 
99  private:
100   ScopedShapedBuffer result_;
101 
102   // Leftover buffers for the caller to release. Elements in this list are
103   // donated input memory buffers that are not reused by XLA as outputs.
104   std::vector<se::OwningDeviceMemory> to_be_released_;
105 
106   // These are the indices in result_ which have been aliased from the caller.
107   // If the execution operation fails, the caller should maintain ownership of
108   // the buffer, so we track the indices here, and unless the ExecutionOutput is
109   // committed, we remove them from the result_ before destruction.
110   std::vector<ShapeIndex> aliased_indices_;
111 
112   // A shape table is a continuous region in memory that is used to hold the
113   // runtime dimension sizes of dynamic output shapes.
114   se::OwningDeviceMemory output_shape_table_;
115 };
116 
117 // A given platform's compiler will produce an Executable -- this is a uniform
118 // interface that is used for launching compiled programs across platforms.
119 class Executable {
120  public:
Executable(std::shared_ptr<HloModule> hlo_module,std::unique_ptr<HloProfilePrinterData> hlo_profile_printer_data,std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map)121   explicit Executable(
122       std::shared_ptr<HloModule> hlo_module,
123       std::unique_ptr<HloProfilePrinterData> hlo_profile_printer_data,
124       std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map)
125       : hlo_module_(std::move(hlo_module)),
126         hlo_profile_printer_data_(std::move(hlo_profile_printer_data)),
127         hlo_profile_index_map_(std::move(hlo_profile_index_map)) {
128     CHECK_EQ(hlo_profile_printer_data_.get() == nullptr,
129              hlo_profile_index_map_.get() == nullptr);
130   }
~Executable()131   virtual ~Executable() {}
132 
133   // Enqueues the compilation result on the provided stream, passing the given
134   // arguments. This call is blocking and returns after the execution is done.
135   //
136   // If the hlo_execution_profile is provided as non-nullptr, profiling will be
137   // enabled.
138   //
139   // Returns a shaped buffer containing the result of the computation.
140   StatusOr<ScopedShapedBuffer> ExecuteOnStream(
141       const ServiceExecutableRunOptions* run_options,
142       absl::Span<const ShapedBuffer* const> arguments,
143       HloExecutionProfile* hlo_execution_profile);
144 
145   // Starts the given program executing on the given stream/executor.
146   //
147   // `arguments` are ShapeTree containing the input parameters. For each element
148   // in the shape tree, if the element holds the ownership of the memory, it is
149   // considered donated and XLA will potentially reuse it as output buffers. For
150   // all donated inputs, XLA is also responsible for freeing them.
151   //
152   // If an input is donated to XLA but is not reused as output, it is returned
153   // as an leftover buffer for the caller to release.
154   //
155   // This call should be non-blocking and may return as soon as all of the
156   // operations are enqueued for launch on the stream. Note that some
157   // implementations may in fact block or may block in some circumstances (e.g.,
158   // when profiling); i.e., asynchronous is a "may" not a "must".
159   //
160   // If the hlo_execution_profile is provided as non-nullptr, profiling will be
161   // enabled. Note that profiling is tricky to use correctly, as the profiling
162   // objects (when they exist) must out-live the task.
163   StatusOr<ScopedShapedBuffer> ExecuteAsyncOnStream(
164       const ServiceExecutableRunOptions* run_options,
165       absl::Span<const ShapedBuffer* const> arguments,
166       HloExecutionProfile* hlo_execution_profile);
167 
168   // Same as ExecuteAsyncOnStream(), but blocks waiting for the computation to
169   // complete.
170   StatusOr<ExecutionOutput> ExecuteOnStream(
171       const ServiceExecutableRunOptions* run_options,
172       std::vector<ShapeTree<MaybeOwningDeviceMemory>> arguments,
173       HloExecutionProfile* hlo_execution_profile);
174 
175   virtual StatusOr<ExecutionOutput> ExecuteAsyncOnStream(
176       const ServiceExecutableRunOptions* run_options,
177       std::vector<ShapeTree<MaybeOwningDeviceMemory>> arguments,
178       HloExecutionProfile* hlo_execution_profile) = 0;
179 
180   // Same as ExecuteOnStream(), but runs this executable on multiple
181   // streams. arguments[i] contains the arguments to the execution on
182   // run_options[i]->stream() and the returned value is at index i of the
183   // returned vector.
184   virtual StatusOr<std::vector<ScopedShapedBuffer>> ExecuteOnStreams(
185       absl::Span<const ServiceExecutableRunOptions> run_options,
186       absl::Span<const absl::Span<const ShapedBuffer* const>> arguments);
187 
188   // Populates `hlo_execution_profile` from `executor`. This is implicit in any
189   // Execute* API call that takes a hlo_execution_profile argument, but must be
190   // called explicitly for other (async, for example) variants after the stream
191   // has completed.
PopulateExecutionProfile(ExecutionProfile * execution_profile,HloExecutionProfile * hlo_execution_profile,se::Stream * stream)192   virtual Status PopulateExecutionProfile(
193       ExecutionProfile* execution_profile,
194       HloExecutionProfile* hlo_execution_profile, se::Stream* stream) {
195     return Status::OK();
196   }
197 
198   // Convenience wrapper for calling Executable::ExecuteOnStream. Sets up a
199   // timer for the execution, sets up HLO profiling if enabled, and fills in the
200   // given ExecutionProfile if non-null.
201   StatusOr<ScopedShapedBuffer> ExecuteOnStreamWrapper(
202       const ServiceExecutableRunOptions* run_options,
203       absl::Span<const ShapedBuffer* const> arguments);
204 
205   StatusOr<ScopedShapedBuffer> ExecuteAsyncOnStreamWrapper(
206       const ServiceExecutableRunOptions* run_options,
207       absl::Span<const ShapedBuffer* const> arguments);
208 
209   StatusOr<ExecutionOutput> ExecuteAsyncOnStreamWrapper(
210       const ServiceExecutableRunOptions* run_options,
211       std::vector<ShapeTree<MaybeOwningDeviceMemory>> arguments);
212 
hlo_profile_printer_data()213   const HloProfilePrinterData& hlo_profile_printer_data() const {
214     CHECK(hlo_profiling_enabled());
215     return *hlo_profile_printer_data_;
216   }
217 
hlo_profile_index_map()218   const HloProfileIndexMap& hlo_profile_index_map() const {
219     CHECK(hlo_profiling_enabled());
220     return *hlo_profile_index_map_;
221   }
222 
223   // Returns whether this executable was compiled with HLO profilings support
224   // enabled. If not, the caller should not expect an hlo_execution_profile
225   // passed to ExecuteOnStream above to be populated during execution.
hlo_profiling_enabled()226   bool hlo_profiling_enabled() const {
227     return hlo_profile_printer_data_ != nullptr;
228   }
229 
module()230   HloModule& module() const { return *hlo_module_; }
shared_module()231   std::shared_ptr<HloModule> shared_module() const { return hlo_module_; }
232 
has_module()233   const bool has_module() const { return hlo_module_ != nullptr; }
234 
module_config()235   const HloModuleConfig& module_config() const { return hlo_module_->config(); }
236 
237   // The shape (including layout) that results from this execution. This is the
238   // shape of the DeviceMemoryBase result value in ExecuteOnStream above.
result_shape()239   const Shape& result_shape() const {
240     return hlo_module_->config().entry_computation_layout().result_shape();
241   }
242 
243   // Returns the size of the executable in bytes. Returns -1 if this query is
244   // not supported by the executable.
245   //
246   // Does not include the size of used libraries (e.g. cuDNN, Eigen, etc.).
247   virtual int64 SizeOfGeneratedCodeInBytes();
248 
249   // Dumping helpers.
set_hlo_proto(std::unique_ptr<xla::HloProto> hlo_proto)250   void set_hlo_proto(std::unique_ptr<xla::HloProto> hlo_proto) {
251     hlo_proto_ = std::move(hlo_proto);
252   }
dumping_snapshot()253   bool dumping_snapshot() const { return hlo_proto_ != nullptr; }
hlo_proto()254   HloProto const* hlo_proto() const { return hlo_proto_.get(); }
255 
256  protected:
257   // HloModule this was compiled from. BufferAssignment keeps pointers to
258   // HloInstructions owned by the HloModule so we need to keep the HloModule
259   // around.
260   const std::shared_ptr<HloModule> hlo_module_;
261 
262   // The serialized HLO proto. Non-null only if dumping snapshots is enabled.
263   std::unique_ptr<HloProto const> hlo_proto_;
264 
265   // Execution count, used to generate a unique filename for each dumped
266   // execution.
267   int64 execution_count_ = 0;
268 
269   std::unique_ptr<HloProfilePrinterData> hlo_profile_printer_data_;
270   std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map_;
271 };
272 
273 }  // namespace xla
274 
275 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_EXECUTABLE_H_
276