1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/xla/client/local_client.h"
17 
18 #include <memory>
19 #include <string>
20 #include <utility>
21 
22 #include "llvm/ADT/Triple.h"
23 #include "tensorflow/compiler/xla/client/xla_computation.h"
24 #include "tensorflow/compiler/xla/service/backend.h"
25 #include "tensorflow/compiler/xla/service/dump.h"
26 #include "tensorflow/compiler/xla/service/service_executable_run_options.h"
27 #include "tensorflow/compiler/xla/service/source_map_util.h"
28 #include "tensorflow/compiler/xla/service/stream_pool.h"
29 #include "tensorflow/compiler/xla/status_macros.h"
30 
31 using xla::source_map_util::InvalidParameterArgument;
32 
33 namespace xla {
34 
35 namespace {
BorrowStreamForDevice(int device_ordinal,Backend * backend)36 StatusOr<StreamPool::Ptr> BorrowStreamForDevice(int device_ordinal,
37                                                 Backend* backend) {
38   if (device_ordinal < 0) {
39     device_ordinal = backend->default_device_ordinal();
40   }
41   return backend->BorrowStream(device_ordinal);
42 }
43 }  // namespace
44 
LocalExecutable(std::unique_ptr<Executable> executable,Backend * backend,ExecutableBuildOptions build_options)45 LocalExecutable::LocalExecutable(std::unique_ptr<Executable> executable,
46                                  Backend* backend,
47                                  ExecutableBuildOptions build_options)
48     : executable_(std::move(executable)),
49       backend_(backend),
50       build_options_(std::move(build_options)) {
51   CHECK_GE(build_options_.device_ordinal(), 0)
52       << "Must have a valid device ordinal that the executable was built for.";
53 }
54 
ValidateExecutionOptions(const ExecutableRunOptions & run_options,const Backend & backend)55 Status LocalExecutable::ValidateExecutionOptions(
56     const ExecutableRunOptions& run_options, const Backend& backend) {
57   if (run_options.stream() != nullptr) {
58     if (!run_options.stream()->ok()) {
59       return InvalidArgument("stream is uninitialized or in an error state");
60     }
61 
62     // Check stream matches service platform.
63     const se::Platform* stream_platform =
64         run_options.stream()->parent()->platform();
65     if (stream_platform != backend_->platform()) {
66       return InvalidArgument(
67           "stream is for platform %s, but service targets platform %s",
68           stream_platform->Name(), backend_->platform()->Name());
69     }
70 
71     // Cannot specify device_ordinal with a stream. The stream determines these
72     // values.
73     if (run_options.device_ordinal() != -1) {
74       return InvalidArgument(
75           "cannot set both device ordinal and stream options in "
76           "ExecutableRunOptions; the stream determines the device ordinal");
77     }
78   }
79 
80   // Verify that the device the executable was built for is equivalent
81   // to the device it will run on.
82   int run_device_ordinal = run_options.device_ordinal();
83   if (run_device_ordinal == -1) {
84     run_device_ordinal = run_options.stream() != nullptr
85                              ? run_options.stream()->parent()->device_ordinal()
86                              : backend_->default_device_ordinal();
87   }
88   TF_ASSIGN_OR_RETURN(bool devices_equivalent,
89                       backend_->devices_equivalent(
90                           run_device_ordinal, build_options_.device_ordinal()));
91   if (!devices_equivalent) {
92     TF_ASSIGN_OR_RETURN(se::StreamExecutor * run_executor,
93                         backend_->stream_executor(run_device_ordinal));
94     TF_ASSIGN_OR_RETURN(se::StreamExecutor * build_executor,
95                         backend_->stream_executor(build_device_ordinal()));
96     return InvalidArgument(
97         "executable is built for device %s of type \"%s\"; cannot run it on "
98         "device %s of type \"%s\"",
99         backend_->device_name(build_device_ordinal()),
100         build_executor->GetDeviceDescription().name(),
101         backend_->device_name(run_device_ordinal),
102         run_executor->GetDeviceDescription().name());
103   }
104 
105   if (!run_options.allocator()) {
106     return InvalidArgument("an allocator must be provided to ExecuteLocally");
107   }
108 
109   if (run_options.allocator()->platform() != backend.platform()) {
110     return InvalidArgument(
111         "allocator platform (%s) does not match service platform (%s)",
112         run_options.allocator()->platform()->Name(),
113         backend.platform()->Name());
114   }
115 
116   return OkStatus();
117 }
118 
119 StatusOr<std::pair<ServiceExecutableRunOptions, StreamPool::Ptr>>
RunHelper(const absl::Span<const Shape * const> argument_shapes,ExecutableRunOptions run_options)120 LocalExecutable::RunHelper(const absl::Span<const Shape* const> argument_shapes,
121                            ExecutableRunOptions run_options) {
122   const ComputationLayout& computation_layout =
123       executable_->module_config().entry_computation_layout();
124 
125   // Check argument number, shapes, and layouts.
126   const int argument_shapes_size = argument_shapes.size();
127   if (argument_shapes_size != computation_layout.parameter_count()) {
128     return InvalidArgument(
129         "invalid number of arguments for computation: expected %d, got %u",
130         computation_layout.parameter_count(), argument_shapes.size());
131   }
132   for (int i = 0, end = argument_shapes.size(); i < end; ++i) {
133     // TODO(b/187081154): Compare tiling info also.
134     if (!computation_layout.parameter_layout(i).MatchesLayoutInShape(
135             *argument_shapes[i], /*minor_to_major_only=*/false,
136             /*ignore_fully_empty_tiling=*/true)) {
137       return InvalidParameterArgument(
138           executable_.get(), i,
139           "Argument does not match host shape or layout of computation "
140           "parameter "
141           "%d: want %s, got %s",
142           i,
143           ShapeUtil::HumanStringWithLayout(
144               computation_layout.parameter_layout(i).shape()),
145           ShapeUtil::HumanStringWithLayout(*argument_shapes[i]));
146     }
147   }
148 
149   TF_RETURN_IF_ERROR(ValidateExecutionOptions(run_options, *backend_));
150 
151   StreamPool::Ptr stream;
152   if (run_options.stream() == nullptr) {
153     // NB!  The lifetime of `stream` needs to match the lifetime of
154     // `service_options` (otherwise we will end up using a returned stream in
155     // ExecuteOnStreamWrapper), which is why it isn't declared in the inner "if"
156     // scope.
157     TF_ASSIGN_OR_RETURN(
158         stream, BorrowStreamForDevice(run_options.device_ordinal(), backend_));
159     run_options.set_stream(stream.get());
160   }
161   if (run_options.allocator() == nullptr) {
162     run_options.set_allocator(backend_->memory_allocator());
163   }
164 
165   // For local client execution on CPU backends:
166   // *) The thread pool used for eigen CPU ops is from
167   //    ExecutableRunOptions.eigen_intra_op_thread_pool.
168   // *) The thread pool used for XLA CPU ops is from
169   //    backend_->eigen_intra_op_thread_pool().
170   ServiceExecutableRunOptions service_options(run_options,
171                                               backend_->StreamBorrower());
172   return std::make_pair(service_options, std::move(stream));
173 }
174 
Run(const absl::Span<const ShapedBuffer * const> arguments,ExecutableRunOptions run_options)175 StatusOr<ScopedShapedBuffer> LocalExecutable::Run(
176     const absl::Span<const ShapedBuffer* const> arguments,
177     ExecutableRunOptions run_options) {
178   std::vector<const Shape*> argument_shapes;
179   argument_shapes.reserve(arguments.size());
180   for (const ShapedBuffer* const arg : arguments) {
181     argument_shapes.push_back(&arg->on_device_shape());
182   }
183   return AsyncCallAndBlockHostUntilDone<xla::ScopedShapedBuffer>(
184       argument_shapes, run_options, [&](const ExecutableRunOptions& options) {
185         return RunAsync(arguments, options);
186       });
187 }
188 
Run(std::vector<ExecutionInput> arguments,ExecutableRunOptions run_options)189 StatusOr<ExecutionOutput> LocalExecutable::Run(
190     std::vector<ExecutionInput> arguments, ExecutableRunOptions run_options) {
191   std::vector<const Shape*> argument_shapes;
192   argument_shapes.reserve(arguments.size());
193   for (const ExecutionInput& arg : arguments) {
194     argument_shapes.push_back(&arg.shape());
195   }
196   return AsyncCallAndBlockHostUntilDone<ExecutionOutput>(
197       argument_shapes, run_options, [&](const ExecutableRunOptions& options) {
198         return RunAsync(argument_shapes, std::move(arguments), options);
199       });
200 }
201 
DumpArguments(const Backend * backend,const Executable * executable,const absl::Span<const ShapedBuffer * const> arguments,se::Stream * stream)202 static std::shared_ptr<HloSnapshot> DumpArguments(
203     const Backend* backend, const Executable* executable,
204     const absl::Span<const ShapedBuffer* const> arguments, se::Stream* stream) {
205   auto snapshot = std::make_shared<HloSnapshot>();
206   snapshot->set_execution_platform(backend->platform()->Name());
207   *snapshot->mutable_hlo() = *executable->hlo_proto();
208   for (const ShapedBuffer* arg : arguments) {
209     auto literal = std::make_shared<Literal>(arg->on_host_shape());
210     backend->transfer_manager()->TransferLiteralFromDevice(
211         stream, *arg, literal.get(), [snapshot, literal](Status status) {
212           if (!status.ok()) {
213             LOG(ERROR) << "TransferLiteralFromDevice for HLO snapshot inputs "
214                           "failed: "
215                        << status;
216             return;
217           }
218           *snapshot->add_arguments() = literal->ToProto();
219         });
220   }
221   return snapshot;
222 }
223 
DumpOutputsAndSaveSnapshot(const Backend * backend,const ShapedBuffer & outputs,std::shared_ptr<HloSnapshot> snapshot,se::Stream * stream)224 static void DumpOutputsAndSaveSnapshot(const Backend* backend,
225                                        const ShapedBuffer& outputs,
226                                        std::shared_ptr<HloSnapshot> snapshot,
227                                        se::Stream* stream) {
228   auto literal = std::make_shared<Literal>(outputs.on_host_shape());
229   backend->transfer_manager()->TransferLiteralFromDevice(
230       stream, outputs, literal.get(),
231       [snapshot{std::move(snapshot)}, literal](Status status) {
232         if (status.ok()) {
233           *snapshot->mutable_result() = literal->ToProto();
234         } else {
235           LOG(ERROR)
236               << "TransferLiteralFromDevice for HLO snapshot outputs failed: "
237               << status;
238         }
239         DumpHloSnapshotIfEnabled(*snapshot, GetDebugOptionsFromFlags());
240       });
241 }
242 
RunAsync(const absl::Span<const ShapedBuffer * const> arguments,ExecutableRunOptions run_options)243 StatusOr<ScopedShapedBuffer> LocalExecutable::RunAsync(
244     const absl::Span<const ShapedBuffer* const> arguments,
245     ExecutableRunOptions run_options) {
246   std::vector<const Shape*> argument_shapes;
247   argument_shapes.reserve(arguments.size());
248   for (const ShapedBuffer* const arg : arguments) {
249     argument_shapes.push_back(&arg->on_device_shape());
250   }
251   TF_ASSIGN_OR_RETURN(auto options_and_stream,
252                       RunHelper(argument_shapes, run_options));
253   se::Stream* stream = run_options.stream();
254 
255   std::shared_ptr<HloSnapshot> snapshot;
256   if (executable_->dumping_snapshot()) {
257     snapshot = DumpArguments(backend_, executable_.get(), arguments, stream);
258   }
259 
260   TF_ASSIGN_OR_RETURN(ScopedShapedBuffer outputs,
261                       executable_->ExecuteAsyncOnStreamWrapper(
262                           &options_and_stream.first, arguments));
263 
264   // Transfer the outputs and save the snapshot to disk.
265   if (snapshot) {
266     DumpOutputsAndSaveSnapshot(backend_, outputs, std::move(snapshot), stream);
267   }
268 
269   return std::move(outputs);
270 }
271 
MaybeOwningShapeTreeToShapedBuffer(const ShapeTree<MaybeOwningDeviceMemory> & tree,int device_ordinal)272 static ShapedBuffer MaybeOwningShapeTreeToShapedBuffer(
273     const ShapeTree<MaybeOwningDeviceMemory>& tree, int device_ordinal) {
274   ShapedBuffer result(tree.shape(), device_ordinal);
275   auto it = tree.begin();
276   auto out_it = result.buffers().begin();
277   for (; it != tree.end(); ++it, ++out_it) {
278     out_it->second = it->second.AsDeviceMemoryBase();
279   }
280   return result;
281 }
282 
RunAsync(absl::Span<Shape const * const> argument_host_shapes,std::vector<ExecutionInput> arguments,ExecutableRunOptions run_options)283 StatusOr<ExecutionOutput> LocalExecutable::RunAsync(
284     absl::Span<Shape const* const> argument_host_shapes,
285     std::vector<ExecutionInput> arguments, ExecutableRunOptions run_options) {
286   if (argument_host_shapes.size() != arguments.size()) {
287     return InvalidArgument(
288         "Number of argument host shapes not equal to number of arguments (%d "
289         "vs %d)",
290         argument_host_shapes.size(), arguments.size());
291   }
292   TF_ASSIGN_OR_RETURN(auto options_and_stream,
293                       RunHelper(argument_host_shapes, run_options));
294   se::Stream* stream = run_options.stream();
295 
296   std::shared_ptr<HloSnapshot> snapshot;
297   if (executable_->dumping_snapshot()) {
298     std::vector<ShapedBuffer> shaped_buffers;
299     std::vector<const ShapedBuffer*> shaped_buffer_ptrs;
300     shaped_buffers.reserve(arguments.size());
301     shaped_buffer_ptrs.reserve(arguments.size());
302     for (size_t i = 0; i < arguments.size(); ++i) {
303       shaped_buffers.push_back(MaybeOwningShapeTreeToShapedBuffer(
304           arguments[i].Buffers(), stream->parent()->device_ordinal()));
305       shaped_buffer_ptrs.push_back(&shaped_buffers.back());
306     }
307 
308     snapshot =
309         DumpArguments(backend_, executable_.get(), shaped_buffer_ptrs, stream);
310   }
311 
312   TF_ASSIGN_OR_RETURN(ExecutionOutput outputs,
313                       executable_->ExecuteAsyncOnStreamWrapper(
314                           &options_and_stream.first, std::move(arguments)));
315 
316   // Transfer the outputs and save the snapshot to disk.
317   if (snapshot) {
318     DumpOutputsAndSaveSnapshot(backend_, outputs.Result(), std::move(snapshot),
319                                stream);
320   }
321 
322   return std::move(outputs);
323 }
324 
RunAsync(std::vector<ExecutionInput> arguments,ExecutableRunOptions run_options)325 StatusOr<ExecutionOutput> LocalExecutable::RunAsync(
326     std::vector<ExecutionInput> arguments, ExecutableRunOptions run_options) {
327   std::vector<const Shape*> argument_shapes;
328   argument_shapes.reserve(arguments.size());
329   for (const ExecutionInput& arg : arguments) {
330     argument_shapes.push_back(&arg.shape());
331   }
332   return RunAsync(argument_shapes, std::move(arguments), run_options);
333 }
334 
platform() const335 se::Platform* LocalClient::platform() const {
336   return local_service_->backend().platform();
337 }
338 
device_count() const339 int LocalClient::device_count() const {
340   return local_service_->backend().device_count();
341 }
342 
device_ordinal_supported(int device_ordinal) const343 bool LocalClient::device_ordinal_supported(int device_ordinal) const {
344   return local_service_->backend().device_ordinal_supported(device_ordinal);
345 }
346 
default_device_ordinal() const347 int LocalClient::default_device_ordinal() const {
348   return local_service_->backend().default_device_ordinal();
349 }
350 
backend() const351 const Backend& LocalClient::backend() const {
352   return local_service_->backend();
353 }
354 
mutable_backend()355 Backend* LocalClient::mutable_backend() {
356   return local_service_->mutable_backend();
357 }
358 
UpdateBuildOptions(const ExecutableBuildOptions & options,int default_device_ordinal)359 static StatusOr<ExecutableBuildOptions> UpdateBuildOptions(
360     const ExecutableBuildOptions& options, int default_device_ordinal) {
361   ExecutableBuildOptions updated_options = options;
362   if (options.device_ordinal() == -1) {
363     updated_options.set_device_ordinal(default_device_ordinal);
364     VLOG(3) << "Set device ordinal to default value of: "
365             << updated_options.device_ordinal();
366   }
367   if (options.has_device_assignment()) {
368     if (options.device_assignment().replica_count() != options.num_replicas()) {
369       return InvalidArgument(
370           "Mismatched number of replicas for device "
371           "assignment and computation (%d vs %d).\n%s",
372           options.device_assignment().replica_count(), options.num_replicas(),
373           options.device_assignment().ToString());
374     }
375     if (options.device_assignment().computation_count() !=
376         options.num_partitions()) {
377       return InvalidArgument(
378           "Mismatched number of partitions for device "
379           "assignment and computation (%d vs %d).\n%s",
380           options.device_assignment().computation_count(),
381           options.num_partitions(), options.device_assignment().ToString());
382     }
383   }
384   return updated_options;
385 }
386 
Compile(const XlaComputation & computation,const absl::Span<const Shape * const> argument_layouts,const ExecutableBuildOptions & options)387 StatusOr<std::vector<std::unique_ptr<LocalExecutable>>> LocalClient::Compile(
388     const XlaComputation& computation,
389     const absl::Span<const Shape* const> argument_layouts,
390     const ExecutableBuildOptions& options) {
391   TF_ASSIGN_OR_RETURN(ExecutableBuildOptions updated_options,
392                       UpdateBuildOptions(options, default_device_ordinal()));
393   TF_ASSIGN_OR_RETURN(std::vector<std::unique_ptr<Executable>> executables,
394                       local_service_->CompileExecutables(
395                           computation, argument_layouts, updated_options));
396 
397   std::vector<std::unique_ptr<LocalExecutable>> local_executables;
398   local_executables.reserve(executables.size());
399 
400   for (auto& executable : executables) {
401     local_executables.push_back(std::make_unique<LocalExecutable>(
402         std::move(executable), local_service_->mutable_backend(),
403         updated_options));
404   }
405 
406   return std::move(local_executables);
407 }
408 
409 StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
CompileAheadOfTime(const XlaComputation & computation,const absl::Span<const Shape * const> argument_layouts,const ExecutableBuildOptions & options)410 LocalClient::CompileAheadOfTime(
411     const XlaComputation& computation,
412     const absl::Span<const Shape* const> argument_layouts,
413     const ExecutableBuildOptions& options) {
414   TF_ASSIGN_OR_RETURN(ExecutableBuildOptions updated_options,
415                       UpdateBuildOptions(options, default_device_ordinal()));
416   TF_ASSIGN_OR_RETURN(
417       std::vector<std::unique_ptr<AotCompilationResult>> aot_results,
418       local_service_->CompileAotResults(computation, argument_layouts,
419                                         updated_options));
420 
421   return std::move(aot_results);
422 }
423 
Load(const std::string & serialized_aot_result,const ExecutableBuildOptions & options)424 StatusOr<std::unique_ptr<LocalExecutable>> LocalClient::Load(
425     const std::string& serialized_aot_result,
426     const ExecutableBuildOptions& options) {
427   TF_ASSIGN_OR_RETURN(ExecutableBuildOptions updated_options,
428                       UpdateBuildOptions(options, default_device_ordinal()));
429   TF_ASSIGN_OR_RETURN(
430       se::StreamExecutor * executor,
431       backend().stream_executor(updated_options.device_ordinal()));
432 
433   TF_ASSIGN_OR_RETURN(Compiler * compiler,
434                       Compiler::GetForPlatform(platform()));
435   TF_ASSIGN_OR_RETURN(
436       std::unique_ptr<xla::AotCompilationResult> aot_result,
437       compiler->LoadAotCompilationResult(serialized_aot_result));
438 
439   TF_ASSIGN_OR_RETURN(std::unique_ptr<Executable> executable,
440                       aot_result->LoadExecutable(compiler, executor));
441   return std::make_unique<LocalExecutable>(std::move(executable),
442                                            local_service_->mutable_backend(),
443                                            updated_options);
444 }
445 
LiteralToShapedBuffer(const LiteralSlice & literal,int device_ordinal,se::DeviceMemoryAllocator * allocator)446 StatusOr<ScopedShapedBuffer> LocalClient::LiteralToShapedBuffer(
447     const LiteralSlice& literal, int device_ordinal,
448     se::DeviceMemoryAllocator* allocator) {
449   if (allocator == nullptr) {
450     allocator = backend().memory_allocator();
451   }
452   TF_ASSIGN_OR_RETURN(auto scoped_buffer,
453                       backend().transfer_manager()->AllocateScopedShapedBuffer(
454                           literal.shape(), allocator, device_ordinal));
455   TF_ASSIGN_OR_RETURN(auto stream,
456                       mutable_backend()->BorrowStream(device_ordinal));
457   TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralToDevice(
458       stream.get(), literal, scoped_buffer));
459   return std::move(scoped_buffer);
460 }
461 
ShapedBufferToLiteral(const ShapedBuffer & shaped_buffer)462 StatusOr<Literal> LocalClient::ShapedBufferToLiteral(
463     const ShapedBuffer& shaped_buffer) {
464   TF_ASSIGN_OR_RETURN(auto stream, mutable_backend()->BorrowStream(
465                                        shaped_buffer.device_ordinal()));
466   return backend().transfer_manager()->TransferLiteralFromDevice(stream.get(),
467                                                                  shaped_buffer);
468 }
469 
GlobalDataToShapedBuffer(const GlobalDataHandle & data,int replica_number)470 StatusOr<const ShapedBuffer*> LocalClient::GlobalDataToShapedBuffer(
471     const GlobalDataHandle& data, int replica_number) {
472   return local_service_->GlobalDataToShapedBuffer(data, replica_number);
473 }
474 
TransferToInfeedLocal(const LiteralSlice & literal,int device_ordinal)475 Status LocalClient::TransferToInfeedLocal(const LiteralSlice& literal,
476                                           int device_ordinal) {
477   TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor,
478                       backend().stream_executor(device_ordinal));
479   return backend().transfer_manager()->TransferLiteralToInfeed(executor,
480                                                                literal);
481 }
482 
TransferFromOutfeedLocal(int device_ordinal,MutableBorrowingLiteral literal)483 Status LocalClient::TransferFromOutfeedLocal(int device_ordinal,
484                                              MutableBorrowingLiteral literal) {
485   TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor,
486                       backend().stream_executor(device_ordinal));
487   return backend().transfer_manager()->TransferLiteralFromOutfeed(executor,
488                                                                   literal);
489 }
490 
ReplicaNumberToDeviceOrdinal(int replica_number)491 StatusOr<int> LocalClient::ReplicaNumberToDeviceOrdinal(int replica_number) {
492   return local_service_->ReplicaNumberToDeviceOrdinal(replica_number);
493 }
494 
TransferToLocalServer(const::xla::BorrowingLiteral & literal,int device_ordinal)495 StatusOr<TransferToServerResponse> LocalClient::TransferToLocalServer(
496     const ::xla::BorrowingLiteral& literal, int device_ordinal) {
497   const ::xla::Shape& shape = literal.shape();
498 
499   TF_ASSIGN_OR_RETURN(::xla::ScopedShapedBuffer shaped_buffer,
500                       backend().transfer_manager()->AllocateScopedShapedBuffer(
501                           shape, backend().memory_allocator(), device_ordinal));
502   TF_ASSIGN_OR_RETURN(auto stream,
503                       mutable_backend()->BorrowStream(device_ordinal));
504   TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralToDevice(
505       stream.get(), literal, shaped_buffer));
506   std::vector<::xla::ScopedShapedBuffer> replicated_buffer;
507   replicated_buffer.emplace_back(std::move(shaped_buffer));
508   ::xla::TransferToServerResponse result;
509   TF_ASSIGN_OR_RETURN(*result.mutable_data(),
510                       local_service_->RegisterReplicatedBuffers(
511                           std::move(replicated_buffer),
512                           absl::StrCat("TransferToServer literal of shape ",
513                                        ::xla::ShapeUtil::HumanString(shape))));
514 
515   return result;
516 }
517 
518 }  // namespace xla
519