• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/common_runtime/dma_helper.h"
17 #include "tensorflow/core/common_runtime/scoped_allocator.h"
18 #include "tensorflow/core/common_runtime/scoped_allocator_mgr.h"
19 #include "tensorflow/core/framework/allocator.h"
20 #include "tensorflow/core/framework/op_kernel.h"
21 #include "tensorflow/core/framework/tensor.h"
22 #include "tensorflow/core/lib/core/errors.h"
23 #include "tensorflow/core/lib/core/status.h"
24 
25 namespace tensorflow {
26 
27 class ScopedAllocatorOp : public OpKernel {
28  public:
ScopedAllocatorOp(OpKernelConstruction * context)29   explicit ScopedAllocatorOp(OpKernelConstruction* context)
30       : OpKernel(context) {
31     OP_REQUIRES_OK(context, context->GetAttr("T", &dtype_));
32     OP_REQUIRES_OK(context, context->GetAttr("shapes", &shapes_));
33     OP_REQUIRES_OK(context, context->GetAttr("sa_name", &name_));
34     OP_REQUIRES_OK(context, context->GetAttr("id", &id_));
35     OP_REQUIRES_OK(context, context->GetAttr("expected_call_count",
36                                              &expected_call_count_));
37     device_ = context->device();
38     // Precalculate the size of the backing tensor and the offsets of
39     // the subtensors to be allocated from it, taking into account
40     // alignment considerations.
41     ScopedAllocatorMgr::PopulateFields(id_, shapes_, dtype_, &fields_);
42     size_t num_bytes = fields_.back().offset + fields_.back().bytes_allocated;
43     num_elements_ = num_bytes / DataTypeSize(dtype_);
44     OP_REQUIRES(context, num_bytes % DataTypeSize(dtype_) == 0,
45                 errors::InvalidArgument(
46                     "Number of bytes ", num_bytes,
47                     " must be divisible by size of datatype ", dtype_));
48   }
49 
Compute(OpKernelContext * context)50   void Compute(OpKernelContext* context) override {
51     ScopedAllocatorMgr* sam = device_->GetScopedAllocatorMgr();
52     if (!sam) {
53       context->SetStatus(errors::Internal(
54           "ScopedAllocatorMgr not supported on device ", device_->name()));
55       return;
56     }
57     Tensor* backing_tensor = nullptr;
58     AllocatorAttributes attr = context->output_alloc_attr(0);
59     Status s =
60         context->allocate_output(0, {num_elements_}, &backing_tensor, attr);
61     VLOG(1) << "_ScopedAllocatorOp " << context->op_kernel().name()
62             << " new backing tensor size " << backing_tensor->TotalBytes()
63             << " num_elements_ " << num_elements_ << " buffer "
64             << DMAHelper::buffer(backing_tensor) << " base addr "
65             << DMAHelper::base(backing_tensor);
66     if (s.ok()) {
67       s = sam->AddScopedAllocator(*backing_tensor, context->step_id(), id_,
68                                   name_, fields_, expected_call_count_);
69     }
70     if (!s.ok()) {
71       context->SetStatus(s);
72     }
73   }
74 
75  private:
76   std::vector<TensorShape> shapes_;
77   DataType dtype_;
78   int64_t num_elements_;
79   std::vector<ScopedAllocator::Field> fields_;
80   string name_;
81   int32 id_;
82   int32 expected_call_count_;
83   DeviceBase* device_;
84 };
85 
86 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocator").Device(DEVICE_CPU),
87                         ScopedAllocatorOp);
88 
89 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocator").Device(DEVICE_GPU),
90                         ScopedAllocatorOp);
91 
92 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocator").Device(DEVICE_DEFAULT),
93                         ScopedAllocatorOp);
94 
95 class ScopedAllocatorConcatOp : public OpKernel {
96  public:
ScopedAllocatorConcatOp(OpKernelConstruction * context)97   explicit ScopedAllocatorConcatOp(OpKernelConstruction* context)
98       : OpKernel(context) {
99     OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_));
100     OP_REQUIRES_OK(context, context->GetAttr("T", &dtype_));
101     OP_REQUIRES_OK(context, context->GetAttr("reshape", &reshape_));
102     // These attributes are just for debugging.
103     OP_REQUIRES_OK(context, context->GetAttr("sa_name", &name_));
104     OP_REQUIRES_OK(context, context->GetAttr("id", &id_));
105     device_ = context->device();
106   }
107 
Compute(OpKernelContext * context)108   void Compute(OpKernelContext* context) override {
109     const Tensor& backing_tensor = context->input(0);
110     // Check that type matches.
111     OP_REQUIRES(context, backing_tensor.dtype() == dtype_,
112                 errors::InvalidArgument("Backing tensor type ",
113                                         DataTypeString(backing_tensor.dtype()),
114                                         " does not match expected type ",
115                                         DataTypeString(dtype_)));
116     // Check that backing tensor is at least as large as the shape of the
117     // output.
118     OP_REQUIRES(context, backing_tensor.NumElements() >= shape_.num_elements(),
119                 errors::InvalidArgument("Backing tensor num elements ",
120                                         backing_tensor.NumElements(),
121                                         " is not >= to expected ",
122                                         shape_.num_elements()));
123     Tensor output(dtype_);
124     if (reshape_) {
125       CHECK(output.CopyFrom(backing_tensor, shape_));
126     } else {
127       CHECK(output.CopyFrom(backing_tensor, backing_tensor.shape()));
128     }
129     context->set_output(0, output);
130     const TensorBuffer* backing_buf = DMAHelper::buffer(&output);
131     const void* backing_tensor_lb = backing_buf->data();
132     const void* backing_tensor_ub = static_cast<const void*>(
133         static_cast<const char*>(backing_tensor_lb) + backing_buf->size());
134     // Check that all inputs lie entirely within the backing tensor.
135     for (int i = 1; i < context->num_inputs(); ++i) {
136       const TensorBuffer* input_buf = DMAHelper::buffer(&context->input(i));
137       const void* input_lb = input_buf->data();
138       const void* input_ub = static_cast<const void*>(
139           static_cast<const char*>(input_lb) + input_buf->size());
140       OP_REQUIRES(
141           context, input_lb >= backing_tensor_lb,
142           errors::InvalidArgument(
143               "Lower bound check fail for input ", i, " from node ",
144               context->op_kernel().requested_input(i), " to node ",
145               context->op_kernel().name(), " input bounds = [", input_lb, ", ",
146               input_ub, "]", " backing_tensor bounds = [", backing_tensor_lb,
147               ", ", backing_tensor_ub, "]"));
148       OP_REQUIRES(
149           context, input_ub <= backing_tensor_ub,
150           errors::InvalidArgument(
151               "Upper bound check fail for input ", i, " from node ",
152               context->op_kernel().requested_input(i), " to node ",
153               context->op_kernel().name(), " input bounds = [", input_lb, ", ",
154               input_ub, "]", " backing_tensor bounds = [", backing_tensor_lb,
155               ", ", backing_tensor_ub, "]"));
156     }
157     VLOG(1) << "_ScopedAllocatorConcatOp outputting backing tensor at "
158             << backing_buf;
159   }
160 
161  private:
162   TensorShape shape_;
163   DataType dtype_;
164   string name_;
165   int32 id_;
166   bool reshape_;
167   DeviceBase* device_;
168 };
169 
170 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorConcat").Device(DEVICE_CPU),
171                         ScopedAllocatorConcatOp);
172 
173 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorConcat").Device(DEVICE_GPU),
174                         ScopedAllocatorConcatOp);
175 
176 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorConcat").Device(DEVICE_DEFAULT),
177                         ScopedAllocatorConcatOp);
178 
179 class ScopedAllocatorSplitOp : public OpKernel {
180  public:
ScopedAllocatorSplitOp(OpKernelConstruction * context)181   explicit ScopedAllocatorSplitOp(OpKernelConstruction* context)
182       : OpKernel(context) {
183     OP_REQUIRES_OK(context, context->GetAttr("T", &dtype_));
184     // This stuff is just for debugging
185     OP_REQUIRES_OK(context, context->GetAttr("sa_name", &name_));
186     OP_REQUIRES_OK(context, context->GetAttr("id", &id_));
187     device_ = context->device();
188   }
189 
Compute(OpKernelContext * context)190   void Compute(OpKernelContext* context) override {
191     Tensor backing_copy(context->input(0));
192     // Check that type matches.
193     OP_REQUIRES(context, backing_copy.dtype() == dtype_,
194                 errors::InvalidArgument("Backing tensor type ",
195                                         DataTypeString(backing_copy.dtype()),
196                                         " does not match expected type ",
197                                         DataTypeString(dtype_)));
198     const TensorBuffer* backing_buf = DMAHelper::buffer(&backing_copy);
199     const void* backing_tensor_lb = backing_buf->data();
200     const void* backing_tensor_ub = static_cast<const void*>(
201         static_cast<const char*>(backing_tensor_lb) + backing_buf->size());
202     for (int i = 1; i < context->num_inputs(); ++i) {
203       VLOG(1) << "_ScopedAllocatorSplitOp assigning input " << i
204               << " to output " << i - 1 << " buf addr "
205               << DMAHelper::base(&context->input(i));
206       Tensor copy(context->input(i));
207       OP_REQUIRES(context, copy.dtype() == dtype_,
208                   errors::InvalidArgument("Input ", i, " tensor type ",
209                                           DataTypeString(copy.dtype()),
210                                           " does not match expected type ",
211                                           DataTypeString(dtype_)));
212       context->set_output(i - 1, copy);
213       const TensorBuffer* input_buf = DMAHelper::buffer(&copy);
214       const void* input_lb = input_buf->data();
215       OP_REQUIRES(
216           context, input_lb >= backing_tensor_lb,
217           errors::InvalidArgument("Lower bound check fail for input ", i,
218                                   " to node ", context->op_kernel().name()));
219       const void* input_ub = static_cast<const void*>(
220           static_cast<const char*>(input_lb) + input_buf->size());
221       OP_REQUIRES(
222           context, input_ub <= backing_tensor_ub,
223           errors::InvalidArgument("Upper bound check fail for input ", i,
224                                   " to node ", context->op_kernel().name()));
225     }
226   }
227 
228  private:
229   DataType dtype_;
230   string name_;
231   int32 id_;
232   DeviceBase* device_;
233 };
234 
235 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorSplit").Device(DEVICE_CPU),
236                         ScopedAllocatorSplitOp);
237 
238 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorSplit").Device(DEVICE_GPU),
239                         ScopedAllocatorSplitOp);
240 
241 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorSplit").Device(DEVICE_DEFAULT),
242                         ScopedAllocatorSplitOp);
243 
244 }  // namespace tensorflow
245