• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/xla/service/interpreter/executable_base.h"
17 
18 #include <type_traits>
19 #include <vector>
20 
21 #include "tensorflow/compiler/xla/literal.h"
22 #include "tensorflow/compiler/xla/service/hlo_computation.h"
23 #include "tensorflow/compiler/xla/service/maybe_owning_device_memory.h"
24 #include "tensorflow/compiler/xla/service/shaped_buffer.h"
25 #include "tensorflow/compiler/xla/service/transfer_manager.h"
26 #include "tensorflow/compiler/xla/shape_tree.h"
27 #include "tensorflow/compiler/xla/shape_util.h"
28 #include "tensorflow/stream_executor/lib/statusor.h"
29 #include "tensorflow/stream_executor/platform.h"
30 #include "tensorflow/stream_executor/stream.h"
31 #include "tensorflow/stream_executor/stream_executor_pimpl.h"
32 
33 namespace xla {
34 namespace interpreter {
35 
InterpreterExecutableBase(std::unique_ptr<HloModule> hlo_module)36 InterpreterExecutableBase::InterpreterExecutableBase(
37     std::unique_ptr<HloModule> hlo_module)
38     : Executable(std::move(hlo_module), /*hlo_profile_printer_data=*/nullptr,
39                  /*hlo_profile_index_map=*/nullptr) {}
40 
ExecuteAsyncOnStream(const ServiceExecutableRunOptions * run_options,std::vector<ExecutionInput> arguments,HloExecutionProfile * hlo_execution_profile)41 StatusOr<ExecutionOutput> InterpreterExecutableBase::ExecuteAsyncOnStream(
42     const ServiceExecutableRunOptions* run_options,
43     std::vector<ExecutionInput> arguments,
44     HloExecutionProfile* hlo_execution_profile) {
45   se::Stream* stream = run_options->stream();
46   se::StreamExecutor* executor = stream->parent();
47   const se::Platform* platform = executor->platform();
48 
49   // Convert the ShapeTree to a ShapedBuffer. We do this so we can call
50   // TransferManager methods below.
51   std::vector<ShapedBuffer> argument_buffers;
52   argument_buffers.reserve(arguments.size());
53   int device_ordinal = run_options->device_ordinal();
54   if (device_ordinal < 0) {
55     device_ordinal = 0;
56   }
57   for (auto& argument : arguments) {
58     const ShapeTree<MaybeOwningDeviceMemory>& buffers = argument.Buffers();
59     argument_buffers.push_back(ShapedBuffer(buffers.shape(),
60                                             /*device_ordinal=*/device_ordinal));
61     auto in_it = buffers.begin();
62     auto out_it = argument_buffers.back().buffers().begin();
63     for (; in_it != buffers.end(); ++in_it, ++out_it) {
64       out_it->second = in_it->second.AsDeviceMemoryBase();
65     }
66   }
67 
68   VLOG(1) << "Execute " << module().name();
69   if (VLOG_IS_ON(2)) {
70     for (const auto& a : argument_buffers) {
71       VLOG(2) << "-- argument " << a;
72     }
73   }
74 
75   uint64_t start_micros = tensorflow::Env::Default()->NowMicros();
76 
77   const HloComputation* computation = module().entry_computation();
78   if (computation->num_parameters() != arguments.size()) {
79     return tensorflow::errors::Internal(
80         "Mismatch between argument count and graph parameter count.");
81   }
82 
83   // Check that the args have the right shape.
84   for (int64_t i = 0; i < computation->num_parameters(); ++i) {
85     const auto& expected_shape = computation->parameter_instruction(i)->shape();
86     const auto& actual_shape = argument_buffers[i].on_device_shape();
87     bool shape_match = true;
88     if (expected_shape.is_dynamic()) {
89       if (!ShapeUtil::DynamicArrayShapeIsCompatible(actual_shape,
90                                                     expected_shape)) {
91         shape_match = false;
92       }
93     } else if (!Shape::Equal().MinorToMajorOnlyInLayout()(expected_shape,
94                                                           actual_shape)) {
95       shape_match = false;
96     }
97     if (!shape_match) {
98       return InvalidArgument(
99           "Shape mismatch on parameter %d.  Expected %s, but was %s.", i,
100           ShapeUtil::HumanStringWithLayout(expected_shape),
101           ShapeUtil::HumanStringWithLayout(actual_shape));
102     }
103   }
104 
105   TF_ASSIGN_OR_RETURN(TransferManager * transfer_manager,
106                       TransferManager::GetForPlatform(platform));
107 
108   // Transform the ShapedBuffer arguments into literals which the evaluator
109   // consumes.
110   std::vector<Literal> arg_literals;
111   const int64_t num_parameters = computation->num_parameters();
112   arg_literals.reserve(num_parameters);
113   for (int64_t p = 0; p < num_parameters; ++p) {
114     TF_ASSIGN_OR_RETURN(Literal arg_literal,
115                         transfer_manager->TransferLiteralFromDevice(
116                             run_options->stream(), argument_buffers[p]));
117     const auto& expected_shape = computation->parameter_instruction(p)->shape();
118     if (expected_shape.is_dynamic()) {
119       // Expand the input literal to expected shape.
120       arg_literal = arg_literal.ToBoundedDynamic(expected_shape);
121     }
122     arg_literals.push_back(std::move(arg_literal));
123   }
124 
125   TF_ASSIGN_OR_RETURN(Literal result_literal,
126                       Evaluate(run_options, *computation, arg_literals));
127   // Shrink the generated dynamic shape into static shape.
128   result_literal = result_literal.ToStatic();
129 
130   // Transform the result literal back into a ShapedBuffer.
131   const HloInputOutputAliasConfig& alias_config =
132       hlo_module_ == nullptr ? HloInputOutputAliasConfig()
133                              : hlo_module_->input_output_alias_config();
134   TF_ASSIGN_OR_RETURN(ExecutionOutput result,
135                       AllocateOutputMemoryWithInputReuse(
136                           result_literal.shape(), alias_config,
137                           run_options->allocator(), &arguments, stream));
138 
139   TF_RETURN_IF_ERROR(transfer_manager->TransferLiteralToDevice(
140       run_options->stream(), result_literal, result.Result()));
141 
142   uint64_t end_micros = tensorflow::Env::Default()->NowMicros();
143 
144   ExecutionProfile* profile = run_options->run_options().execution_profile();
145   if (profile) {
146     const double nanoseconds = (end_micros - start_micros) * 1000.0;
147     profile->set_compute_time_ns(std::max(nanoseconds, 1.0));
148   }
149   MarkToBeReleasedArguments(absl::MakeSpan(arguments), result);
150   return std::move(result);
151 }
152 
153 StatusOr<ExecutionOutput>
AllocateOutputMemoryWithInputReuse(const Shape & shape,const HloInputOutputAliasConfig & alias_config,se::DeviceMemoryAllocator * allocator,std::vector<ExecutionInput> * arguments,se::Stream * stream)154 InterpreterExecutableBase::AllocateOutputMemoryWithInputReuse(
155     const Shape& shape, const HloInputOutputAliasConfig& alias_config,
156     se::DeviceMemoryAllocator* allocator,
157     std::vector<ExecutionInput>* arguments, se::Stream* stream) {
158   TF_RETURN_IF_ERROR(alias_config.ForEachAliasWithStatus(
159       [&](const ShapeIndex& output_index,
160           std::optional<HloInputOutputAliasConfig::Alias> alias) {
161         if (alias && alias->must_alias()) {
162           VLOG(1) << alias->ToString();
163           const MaybeOwningDeviceMemory& original_input =
164               (*arguments)[alias->parameter_number].Buffers().element(
165                   alias->parameter_index);
166           if (!original_input.HasOwnership()) {
167             return InvalidArgument(
168                 "An input was configured to be must-alias at "
169                 "compile time but not donated at runtime: %s",
170                 alias->ToString());
171           }
172         }
173         return OkStatus();
174       }));
175 
176   se::StreamExecutor* executor = stream->parent();
177   const se::Platform* platform = executor->platform();
178   TF_ASSIGN_OR_RETURN(TransferManager * transfer_manager,
179                       TransferManager::GetForPlatform(platform));
180 
181   ExecutionOutput result(shape, allocator, executor->device_ordinal());
182   for (auto& pair : result.MutableResult()->buffers()) {
183     const ShapeIndex& result_index = pair.first;
184     se::DeviceMemoryBase& result_buffer = pair.second;
185     int64_t allocation_bytes =
186         transfer_manager->GetByteSizeRequirement(ShapeUtil::GetSubshape(
187             result.Result().on_device_shape(), result_index));
188 
189     if (!ShapeUtil::IndexIsValid(alias_config.shape(), result_index)) {
190       return InternalError("result_index is invalid: %s",
191                            result_index.ToString());
192     }
193 
194     std::optional<HloInputOutputAliasConfig::Alias> alias =
195         alias_config.GetAliasedParameter(result_index);
196     if (alias) {
197       TF_RET_CHECK(alias->parameter_number < arguments->size());
198       ExecutionInput& input = (*arguments)[alias->parameter_number];
199       MaybeOwningDeviceMemory* device_memory =
200           input.MutableBuffer(alias->parameter_index);
201       if (auto owning = device_memory->Release()) {
202         se::DeviceMemoryBase device_memory_base = owning->Release();
203         *device_memory = device_memory_base;
204         result_buffer = device_memory_base;
205         result.AddAliasedIndex(result_index);
206       } else {
207         VLOG(2) << "An input was not reused since it is not donated "
208                 << alias->ToString();
209       }
210     }
211 
212     if (result_buffer.is_null()) {
213       const Shape& on_device_shape = result.Result().on_device_shape();
214       const Shape& on_device_subshape =
215           ShapeUtil::GetSubshape(on_device_shape, result_index);
216       TF_ASSIGN_OR_RETURN(
217           auto allocated_buffer,
218           allocator->Allocate(executor->device_ordinal(), allocation_bytes,
219                               /*retry_on_failure=*/true,
220                               LayoutUtil::MemorySpace(on_device_subshape)));
221       result_buffer = allocated_buffer.Release();
222     }
223     TF_RET_CHECK(allocation_bytes == 0 || result_buffer != nullptr);
224   }
225 
226   TF_RETURN_IF_ERROR(
227       transfer_manager->WriteTupleIndexTables(stream, result.Result()));
228   return std::move(result);
229 }
230 
231 }  // namespace interpreter
232 }  // namespace xla
233