• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // Contains utilities for launching compiled XLA kernels for a KernelContext.
17 
18 #ifndef TENSORFLOW_COMPILER_JIT_XLA_LAUNCH_UTIL_H_
19 #define TENSORFLOW_COMPILER_JIT_XLA_LAUNCH_UTIL_H_
20 
21 #include "tensorflow/compiler/jit/xla_compilation_cache.h"
22 #include "tensorflow/compiler/jit/xla_tensor.h"
23 #include "tensorflow/compiler/tf2xla/xla_compiler.h"
24 #include "tensorflow/compiler/xla/client/local_client.h"
25 #include "tensorflow/compiler/xla/service/shaped_buffer.h"
26 #include "tensorflow/core/framework/allocation_description.pb.h"
27 #include "tensorflow/core/framework/resource_var.h"
28 #include "tensorflow/core/framework/tensor.h"
29 #include "tensorflow/core/framework/types.h"
30 #include "tensorflow/core/lib/core/status.h"
31 #include "tensorflow/core/lib/gtl/array_slice.h"
32 #include "tensorflow/core/platform/thread_annotations.h"
33 #include "tensorflow/stream_executor/device_memory_allocator.h"
34 
35 namespace tensorflow {
36 
37 // Snapshot of resource variables for a TF kernel invocation, mapping from
38 // parameter number to values at execution time. If the resource variable is not
39 // initialized, the value will not be present.
40 using ResourceVarsSnapshot = absl::flat_hash_map<int, absl::optional<Tensor>>;
41 
42 // Information about the state of a variable passed as input to the _XlaCompile
43 // and _XlaRun operators.  Unlocks the resource variable and decrements its
44 // refcount on destruction.
45 class VariableInfo {
46  public:
47   explicit VariableInfo(int index, absl::string_view name, Var* var);
48   VariableInfo(VariableInfo&& other);
49 
50   VariableInfo& operator=(VariableInfo&& other);
51 
52   VariableInfo(const VariableInfo&) = delete;
53   VariableInfo& operator=(const VariableInfo&) = delete;
54 
55   // The index of the DT_RESOURCE input to the _XlaCompile/_XlaRun operator.
56   // Note that the indices can be different between _XlaCompile and _XlaRun.
index()57   int index() const { return index_; }
58 
59   // A pointer to the resource variable.  May be null if this VariableInfo is
60   // "empty", i.e. it does not track a resource variable.
var()61   Var* var() const { return var_; }
62 
63   // Returns the variable name.
name()64   absl::string_view name() const { return name_; }
65 
66   // Returns true if the resource variable lock was successfully acquired by
67   // this thread.
lock_held()68   bool lock_held() const { return lock_held_; }
set_lock_held()69   void set_lock_held() { lock_held_ = true; }
70 
71   ~VariableInfo();
72 
73  private:
74   int index_;
75   std::string name_;
76   Var* var_;
77 
78   // We can't use a optional<mutex_lock> here because it confuses the compiler's
79   // thread safety analysis. Instead we use a boolean flag and release the lock
80   // in the VariableInfo destructor.
81   bool lock_held_ = false;
82 };
83 
84 // Creates a list of updated resource variables.
85 xla::StatusOr<std::vector<VariableInfo>> GatherVariableInfo(
86     OpKernelContext* ctx,
87     const XlaCompiler::CompilationResult& compilation_result,
88     int missing_ctx_input_prefix);
89 
90 // Takes a snapshot of the values of resource variable arguments, whose indices
91 // are specified in `variable_indices` argument. We snapshot tensors that back
92 // resource variables since concurrent updates may modify the shape, and it is
93 // important that the shapes used for compilation match the true shapes of the
94 // buffers.
95 //
96 // We snapshot the entire set of resource variables as one atomic operation.
97 // This models Read->* dependencies between resource variable operations.  See
98 // jit/resource_operation_safety_analysis for details.
99 Status SnapshotResourceVariables(OpKernelContext* ctx,
100                                  absl::Span<const int> variable_indices,
101                                  absl::Span<VariableInfo const> variable_infos,
102                                  ResourceVarsSnapshot* result);
103 
104 // Acquires the mutexes for all the variables in `variables` using a
105 // deadlock-safe protocol (acquire the mutexes in increasing-address order).
106 //
107 // `variables` is allowed to contain instances that don't track a resource
108 // variable (i.e. variables[i].var() can be null for some i).
109 Status LockVariables(absl::Span<VariableInfo> variables)
110     TF_EXCLUSIVE_LOCK_FUNCTION();
111 
112 // Returns a vector of VariableInfo instances for the resource variable inputs,
113 // given that *all* inputs are in `inputs`. The input indices for the resource
114 // variable inputs are in `variable_indices`.
115 Status GetVariableInfosFromInputs(ResourceMgr* rm, DeviceBase* dev,
116                                   absl::Span<const Tensor* const> inputs,
117                                   absl::Span<const int> variable_indices,
118                                   std::vector<VariableInfo>* result);
119 
120 // Returns pointers to inputs stored in `ctx`.
121 std::vector<const Tensor*> InputsFromContext(OpKernelContext* ctx);
122 
123 // Helper class to perform the marshalling of TensorFlow inputs and outputs to
124 // ShapedBuffers suitable for passing to an XLA computation.
125 class XlaComputationLaunchContext {
126  public:
127   // Create a new launch context. 'allocate_xla_tensors' is true if allocated
128   // output tensors and variables are always XlaTensors. If false they are
129   // assumed to be "normal" device pointers.
130   // If 'use_multiple_streams' is true, tensors may be defined and used on
131   // multiple streams and so se::Events must be defined and waited for. If
132   // 'use_multiple_streams' is true, 'allocate_xla_tensors' must also be true
133   // because we track inter-stream dependencies through events inside XlaTensor
134   // objects.
135   XlaComputationLaunchContext(xla::LocalClient* client,
136                               se::DeviceMemoryAllocator* xla_allocator,
137                               int device_ordinal, bool allocate_xla_tensors,
138                               bool use_multiple_streams);
139 
140   // Builds a XlaCompiler::Argument vector from the arguments to an XlaLaunch
141   // op.
142   // Precondition: variables in `variable_args` are locked.
143   static xla::StatusOr<std::vector<XlaCompiler::Argument>>
144   BuildXlaCompilerArguments(absl::Span<int const> must_be_constant_idxs,
145                             absl::Span<const Tensor* const> inputs,
146                             absl::Span<VariableInfo const> variable_args,
147                             Device* device);
148 
149   // Add all inputs within `ctx` as XLA arguments (returned by arguments()).
150   // `variables` is a map from TensorFlow argument number to resource variable.
151   //
152   // Assumes that the first `missing_ctx_input_prefix` inputs to the kernel are
153   // missing and adjusts input indices accordingly.  All elements in kernel's
154   // input_mapping must be greater than or equal to `missing_ctx_input_prefix`
155   // (in other words, no inputs actually required by the kernel can be missing).
156   xla::StatusOr<std::vector<xla::ExecutionInput>> PopulateInputs(
157       OpKernelContext* ctx,
158       const XlaCompiler::CompilationResult* compilation_result,
159       const std::map<int, const Tensor*>& resource_vars,
160       int missing_ctx_input_prefix,
161       const xla::HloInputOutputAliasConfig& input_output_alias);
162 
163   // Given the XLA output in `output`, populate all outputs of `ctx`.  Also
164   // writes out the resource variable updates.
165   //
166   // Updates to all resource variables are written in a single atomic operation.
167   // This models *->Write dependencies between resource variable operations.
168   // See jit/resource_operation_safety_analysis for details.
169   //
170   //
171   // Assumes that the first `missing_ctx_input_prefix` inputs to the
172   // compilation_result are missing and adjusts input indices accordingly.
173   Status PopulateOutputs(
174       OpKernelContext* ctx,
175       const XlaCompiler::CompilationResult* compilation_result,
176       xla::ScopedShapedBuffer output, int missing_ctx_input_prefix,
177       absl::Span<VariableInfo> variable_infos,
178       const xla::HloInputOutputAliasConfig& input_output_alias,
179       const std::map<int, const Tensor*>& resource_vars);
180 
181  private:
182   xla::LocalClient* client_;
183   se::DeviceMemoryAllocator* xla_allocator_;
184   bool allocate_xla_tensors_;
185   bool use_multiple_streams_;
186   int device_ordinal_;
187 };
188 
189 // A simple TensorBuffer implementation that allows us to create Tensors that
190 // take ownership of pre-allocated memory.
191 class XlaTensorBuffer : public TensorBuffer {
192  public:
XlaTensorBuffer(const void * ptr,size_t expected_size,size_t actual_size,Allocator * allocator)193   XlaTensorBuffer(const void* ptr, size_t expected_size, size_t actual_size,
194                   Allocator* allocator)
195       : TensorBuffer(const_cast<void*>(ptr)),
196         expected_size_(expected_size),
197         actual_size_(actual_size),
198         allocator_(allocator) {}
199 
~XlaTensorBuffer()200   ~XlaTensorBuffer() override {
201     if (data()) {
202       allocator_->DeallocateRaw(data());
203     }
204   }
205 
size()206   size_t size() const override { return expected_size_; }
207 
root_buffer()208   TensorBuffer* root_buffer() override { return this; }
209 
FillAllocationDescription(AllocationDescription * proto)210   void FillAllocationDescription(AllocationDescription* proto) const override {
211     proto->set_allocated_bytes(actual_size_);
212   }
213 
214  private:
215   size_t expected_size_;
216   size_t actual_size_;
217   Allocator* allocator_;
218 };
219 
220 }  // namespace tensorflow
221 
222 #endif  // TENSORFLOW_COMPILER_JIT_XLA_LAUNCH_UTIL_H_
223