1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/core/common_runtime/dma_helper.h" 17 #include "tensorflow/core/common_runtime/scoped_allocator.h" 18 #include "tensorflow/core/common_runtime/scoped_allocator_mgr.h" 19 #include "tensorflow/core/framework/allocator.h" 20 #include "tensorflow/core/framework/op_kernel.h" 21 #include "tensorflow/core/framework/tensor.h" 22 #include "tensorflow/core/lib/core/errors.h" 23 #include "tensorflow/core/lib/core/status.h" 24 25 namespace tensorflow { 26 27 class ScopedAllocatorOp : public OpKernel { 28 public: ScopedAllocatorOp(OpKernelConstruction * context)29 explicit ScopedAllocatorOp(OpKernelConstruction* context) 30 : OpKernel(context) { 31 OP_REQUIRES_OK(context, context->GetAttr("T", &dtype_)); 32 OP_REQUIRES_OK(context, context->GetAttr("shapes", &shapes_)); 33 OP_REQUIRES_OK(context, context->GetAttr("sa_name", &name_)); 34 OP_REQUIRES_OK(context, context->GetAttr("id", &id_)); 35 OP_REQUIRES_OK(context, context->GetAttr("expected_call_count", 36 &expected_call_count_)); 37 device_ = context->device(); 38 // Precalculate the size of the backing tensor and the offsets of 39 // the subtensors to be allocated from it, taking into account 40 // alignment considerations. 41 ScopedAllocatorMgr::PopulateFields(id_, shapes_, dtype_, &fields_); 42 size_t num_bytes = fields_.back().offset + fields_.back().bytes_allocated; 43 num_elements_ = num_bytes / DataTypeSize(dtype_); 44 OP_REQUIRES(context, num_bytes % DataTypeSize(dtype_) == 0, 45 errors::InvalidArgument( 46 "Number of bytes ", num_bytes, 47 " must be divisible by size of datatype ", dtype_)); 48 } 49 Compute(OpKernelContext * context)50 void Compute(OpKernelContext* context) override { 51 ScopedAllocatorMgr* sam = device_->GetScopedAllocatorMgr(); 52 if (!sam) { 53 context->SetStatus(errors::Internal( 54 "ScopedAllocatorMgr not supported on device ", device_->name())); 55 return; 56 } 57 Tensor* backing_tensor = nullptr; 58 AllocatorAttributes attr = context->output_alloc_attr(0); 59 Status s = 60 context->allocate_output(0, {num_elements_}, &backing_tensor, attr); 61 VLOG(1) << "_ScopedAllocatorOp " << context->op_kernel().name() 62 << " new backing tensor size " << backing_tensor->TotalBytes() 63 << " num_elements_ " << num_elements_ << " buffer " 64 << DMAHelper::buffer(backing_tensor) << " base addr " 65 << DMAHelper::base(backing_tensor); 66 if (s.ok()) { 67 s = sam->AddScopedAllocator(*backing_tensor, context->step_id(), id_, 68 name_, fields_, expected_call_count_); 69 } 70 if (!s.ok()) { 71 context->SetStatus(s); 72 } 73 } 74 75 private: 76 std::vector<TensorShape> shapes_; 77 DataType dtype_; 78 int64_t num_elements_; 79 std::vector<ScopedAllocator::Field> fields_; 80 string name_; 81 int32 id_; 82 int32 expected_call_count_; 83 DeviceBase* device_; 84 }; 85 86 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocator").Device(DEVICE_CPU), 87 ScopedAllocatorOp); 88 89 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocator").Device(DEVICE_GPU), 90 ScopedAllocatorOp); 91 92 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocator").Device(DEVICE_DEFAULT), 93 ScopedAllocatorOp); 94 95 class ScopedAllocatorConcatOp : public OpKernel { 96 public: ScopedAllocatorConcatOp(OpKernelConstruction * context)97 explicit ScopedAllocatorConcatOp(OpKernelConstruction* context) 98 : OpKernel(context) { 99 OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_)); 100 OP_REQUIRES_OK(context, context->GetAttr("T", &dtype_)); 101 OP_REQUIRES_OK(context, context->GetAttr("reshape", &reshape_)); 102 // These attributes are just for debugging. 103 OP_REQUIRES_OK(context, context->GetAttr("sa_name", &name_)); 104 OP_REQUIRES_OK(context, context->GetAttr("id", &id_)); 105 device_ = context->device(); 106 } 107 Compute(OpKernelContext * context)108 void Compute(OpKernelContext* context) override { 109 const Tensor& backing_tensor = context->input(0); 110 // Check that type matches. 111 OP_REQUIRES(context, backing_tensor.dtype() == dtype_, 112 errors::InvalidArgument("Backing tensor type ", 113 DataTypeString(backing_tensor.dtype()), 114 " does not match expected type ", 115 DataTypeString(dtype_))); 116 // Check that backing tensor is at least as large as the shape of the 117 // output. 118 OP_REQUIRES(context, backing_tensor.NumElements() >= shape_.num_elements(), 119 errors::InvalidArgument("Backing tensor num elements ", 120 backing_tensor.NumElements(), 121 " is not >= to expected ", 122 shape_.num_elements())); 123 Tensor output(dtype_); 124 if (reshape_) { 125 CHECK(output.CopyFrom(backing_tensor, shape_)); 126 } else { 127 CHECK(output.CopyFrom(backing_tensor, backing_tensor.shape())); 128 } 129 context->set_output(0, output); 130 const TensorBuffer* backing_buf = DMAHelper::buffer(&output); 131 const void* backing_tensor_lb = backing_buf->data(); 132 const void* backing_tensor_ub = static_cast<const void*>( 133 static_cast<const char*>(backing_tensor_lb) + backing_buf->size()); 134 // Check that all inputs lie entirely within the backing tensor. 135 for (int i = 1; i < context->num_inputs(); ++i) { 136 const TensorBuffer* input_buf = DMAHelper::buffer(&context->input(i)); 137 const void* input_lb = input_buf->data(); 138 const void* input_ub = static_cast<const void*>( 139 static_cast<const char*>(input_lb) + input_buf->size()); 140 OP_REQUIRES( 141 context, input_lb >= backing_tensor_lb, 142 errors::InvalidArgument( 143 "Lower bound check fail for input ", i, " from node ", 144 context->op_kernel().requested_input(i), " to node ", 145 context->op_kernel().name(), " input bounds = [", input_lb, ", ", 146 input_ub, "]", " backing_tensor bounds = [", backing_tensor_lb, 147 ", ", backing_tensor_ub, "]")); 148 OP_REQUIRES( 149 context, input_ub <= backing_tensor_ub, 150 errors::InvalidArgument( 151 "Upper bound check fail for input ", i, " from node ", 152 context->op_kernel().requested_input(i), " to node ", 153 context->op_kernel().name(), " input bounds = [", input_lb, ", ", 154 input_ub, "]", " backing_tensor bounds = [", backing_tensor_lb, 155 ", ", backing_tensor_ub, "]")); 156 } 157 VLOG(1) << "_ScopedAllocatorConcatOp outputting backing tensor at " 158 << backing_buf; 159 } 160 161 private: 162 TensorShape shape_; 163 DataType dtype_; 164 string name_; 165 int32 id_; 166 bool reshape_; 167 DeviceBase* device_; 168 }; 169 170 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorConcat").Device(DEVICE_CPU), 171 ScopedAllocatorConcatOp); 172 173 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorConcat").Device(DEVICE_GPU), 174 ScopedAllocatorConcatOp); 175 176 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorConcat").Device(DEVICE_DEFAULT), 177 ScopedAllocatorConcatOp); 178 179 class ScopedAllocatorSplitOp : public OpKernel { 180 public: ScopedAllocatorSplitOp(OpKernelConstruction * context)181 explicit ScopedAllocatorSplitOp(OpKernelConstruction* context) 182 : OpKernel(context) { 183 OP_REQUIRES_OK(context, context->GetAttr("T", &dtype_)); 184 // This stuff is just for debugging 185 OP_REQUIRES_OK(context, context->GetAttr("sa_name", &name_)); 186 OP_REQUIRES_OK(context, context->GetAttr("id", &id_)); 187 device_ = context->device(); 188 } 189 Compute(OpKernelContext * context)190 void Compute(OpKernelContext* context) override { 191 Tensor backing_copy(context->input(0)); 192 // Check that type matches. 193 OP_REQUIRES(context, backing_copy.dtype() == dtype_, 194 errors::InvalidArgument("Backing tensor type ", 195 DataTypeString(backing_copy.dtype()), 196 " does not match expected type ", 197 DataTypeString(dtype_))); 198 const TensorBuffer* backing_buf = DMAHelper::buffer(&backing_copy); 199 const void* backing_tensor_lb = backing_buf->data(); 200 const void* backing_tensor_ub = static_cast<const void*>( 201 static_cast<const char*>(backing_tensor_lb) + backing_buf->size()); 202 for (int i = 1; i < context->num_inputs(); ++i) { 203 VLOG(1) << "_ScopedAllocatorSplitOp assigning input " << i 204 << " to output " << i - 1 << " buf addr " 205 << DMAHelper::base(&context->input(i)); 206 Tensor copy(context->input(i)); 207 OP_REQUIRES(context, copy.dtype() == dtype_, 208 errors::InvalidArgument("Input ", i, " tensor type ", 209 DataTypeString(copy.dtype()), 210 " does not match expected type ", 211 DataTypeString(dtype_))); 212 context->set_output(i - 1, copy); 213 const TensorBuffer* input_buf = DMAHelper::buffer(©); 214 const void* input_lb = input_buf->data(); 215 OP_REQUIRES( 216 context, input_lb >= backing_tensor_lb, 217 errors::InvalidArgument("Lower bound check fail for input ", i, 218 " to node ", context->op_kernel().name())); 219 const void* input_ub = static_cast<const void*>( 220 static_cast<const char*>(input_lb) + input_buf->size()); 221 OP_REQUIRES( 222 context, input_ub <= backing_tensor_ub, 223 errors::InvalidArgument("Upper bound check fail for input ", i, 224 " to node ", context->op_kernel().name())); 225 } 226 } 227 228 private: 229 DataType dtype_; 230 string name_; 231 int32 id_; 232 DeviceBase* device_; 233 }; 234 235 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorSplit").Device(DEVICE_CPU), 236 ScopedAllocatorSplitOp); 237 238 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorSplit").Device(DEVICE_GPU), 239 ScopedAllocatorSplitOp); 240 241 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorSplit").Device(DEVICE_DEFAULT), 242 ScopedAllocatorSplitOp); 243 244 } // namespace tensorflow 245