1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/core/common_runtime/dma_helper.h" 17 #include "tensorflow/core/common_runtime/scoped_allocator.h" 18 #include "tensorflow/core/common_runtime/scoped_allocator_mgr.h" 19 #include "tensorflow/core/framework/allocator.h" 20 #include "tensorflow/core/framework/op_kernel.h" 21 #include "tensorflow/core/framework/tensor.h" 22 #include "tensorflow/core/lib/core/errors.h" 23 #include "tensorflow/core/lib/core/status.h" 24 25 namespace tensorflow { 26 27 class ScopedAllocatorOp : public OpKernel { 28 public: ScopedAllocatorOp(OpKernelConstruction * context)29 explicit ScopedAllocatorOp(OpKernelConstruction* context) 30 : OpKernel(context) { 31 OP_REQUIRES_OK(context, context->GetAttr("T", &dtype_)); 32 OP_REQUIRES_OK(context, context->GetAttr("shapes", &shapes_)); 33 OP_REQUIRES_OK(context, context->GetAttr("sa_name", &name_)); 34 OP_REQUIRES_OK(context, context->GetAttr("id", &id_)); 35 OP_REQUIRES_OK(context, context->GetAttr("expected_call_count", 36 &expected_call_count_)); 37 device_ = context->device(); 38 // Precalculate the size of the backing tensor and the offsets of 39 // the subtensors to be allocated from it, taking into account 40 // alignment considerations. 41 ScopedAllocatorMgr::PopulateFields(id_, shapes_, dtype_, &fields_); 42 size_t num_bytes = fields_.back().offset + fields_.back().bytes; 43 num_elements_ = num_bytes / DataTypeSize(dtype_); 44 OP_REQUIRES(context, num_bytes % DataTypeSize(dtype_) == 0, 45 errors::InvalidArgument( 46 "Number of bytes ", num_bytes, 47 " must be divisible by size of datatype ", dtype_)); 48 } 49 Compute(OpKernelContext * context)50 void Compute(OpKernelContext* context) override { 51 ScopedAllocatorMgr* sam = device_->GetScopedAllocatorMgr(); 52 if (!sam) { 53 context->SetStatus(errors::Internal( 54 "ScopedAllocatorMgr not supported on device ", device_->name())); 55 return; 56 } 57 Tensor* backing_tensor = nullptr; 58 AllocatorAttributes attr = context->output_alloc_attr(0); 59 Status s = 60 context->allocate_output(0, {num_elements_}, &backing_tensor, attr); 61 VLOG(1) << "_ScopedAllocatorOp new backing tensor size " 62 << backing_tensor->TotalBytes() << " num_elements_ " 63 << num_elements_ << " buffer " << DMAHelper::buffer(backing_tensor) 64 << " base addr " << DMAHelper::base(backing_tensor); 65 if (s.ok()) { 66 s = sam->AddScopedAllocator(*backing_tensor, context->step_id(), id_, 67 name_, fields_, expected_call_count_); 68 } 69 if (!s.ok()) { 70 context->SetStatus(s); 71 } 72 } 73 74 private: 75 std::vector<TensorShape> shapes_; 76 DataType dtype_; 77 int64 num_elements_; 78 std::vector<ScopedAllocator::Field> fields_; 79 string name_; 80 int32 id_; 81 int32 expected_call_count_; 82 DeviceBase* device_; 83 }; 84 85 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocator").Device(DEVICE_CPU), 86 ScopedAllocatorOp); 87 88 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocator").Device(DEVICE_GPU), 89 ScopedAllocatorOp); 90 91 class ScopedAllocatorConcatOp : public OpKernel { 92 public: ScopedAllocatorConcatOp(OpKernelConstruction * context)93 explicit ScopedAllocatorConcatOp(OpKernelConstruction* context) 94 : OpKernel(context) { 95 OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_)); 96 OP_REQUIRES_OK(context, context->GetAttr("T", &dtype_)); 97 OP_REQUIRES_OK(context, context->GetAttr("reshape", &reshape_)); 98 // These attributes are just for debugging. 99 OP_REQUIRES_OK(context, context->GetAttr("sa_name", &name_)); 100 OP_REQUIRES_OK(context, context->GetAttr("id", &id_)); 101 device_ = context->device(); 102 } 103 Compute(OpKernelContext * context)104 void Compute(OpKernelContext* context) override { 105 const Tensor& backing_tensor = context->input(0); 106 // Check that type matches. 107 OP_REQUIRES(context, backing_tensor.dtype() == dtype_, 108 errors::InvalidArgument("Backing tensor type ", 109 DataTypeString(backing_tensor.dtype()), 110 " does not match expected type ", 111 DataTypeString(dtype_))); 112 // Check that backing tensor is at least as large as the shape of the 113 // output. 114 OP_REQUIRES(context, backing_tensor.NumElements() >= shape_.num_elements(), 115 errors::InvalidArgument("Backing tensor num elements ", 116 backing_tensor.NumElements(), 117 " is not >= to expected ", 118 shape_.num_elements())); 119 Tensor output(dtype_); 120 if (reshape_) { 121 CHECK(output.CopyFrom(backing_tensor, shape_)); 122 } else { 123 CHECK(output.CopyFrom(backing_tensor, backing_tensor.shape())); 124 } 125 context->set_output(0, output); 126 const TensorBuffer* backing_buf = DMAHelper::buffer(&output); 127 const void* backing_tensor_lb = backing_buf->data(); 128 const void* backing_tensor_ub = static_cast<const void*>( 129 static_cast<const char*>(backing_tensor_lb) + backing_buf->size()); 130 // Check that all inputs lie entirely within the backing tensor. 131 for (int i = 1; i < context->num_inputs(); ++i) { 132 const TensorBuffer* input_buf = DMAHelper::buffer(&context->input(i)); 133 const void* input_lb = input_buf->data(); 134 const void* input_ub = static_cast<const void*>( 135 static_cast<const char*>(input_lb) + input_buf->size()); 136 OP_REQUIRES( 137 context, input_lb >= backing_tensor_lb, 138 errors::InvalidArgument( 139 "Lower bound check fail for input ", i, " from node ", 140 context->op_kernel().requested_input(i), " to node ", 141 context->op_kernel().name(), " input bounds = [", input_lb, ", ", 142 input_ub, "]", " backing_tensor bounds = [", backing_tensor_lb, 143 ", ", backing_tensor_ub, "]")); 144 OP_REQUIRES( 145 context, input_ub <= backing_tensor_ub, 146 errors::InvalidArgument( 147 "Upper bound check fail for input ", i, " from node ", 148 context->op_kernel().requested_input(i), " to node ", 149 context->op_kernel().name(), " input bounds = [", input_lb, ", ", 150 input_ub, "]", " backing_tensor bounds = [", backing_tensor_lb, 151 ", ", backing_tensor_ub, "]")); 152 } 153 VLOG(1) << "_ScopedAllocatorConcatOp outputting backing tensor at " 154 << backing_buf; 155 } 156 157 private: 158 TensorShape shape_; 159 DataType dtype_; 160 string name_; 161 int32 id_; 162 bool reshape_; 163 DeviceBase* device_; 164 }; 165 166 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorConcat").Device(DEVICE_CPU), 167 ScopedAllocatorConcatOp); 168 169 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorConcat").Device(DEVICE_GPU), 170 ScopedAllocatorConcatOp); 171 172 class ScopedAllocatorSplitOp : public OpKernel { 173 public: ScopedAllocatorSplitOp(OpKernelConstruction * context)174 explicit ScopedAllocatorSplitOp(OpKernelConstruction* context) 175 : OpKernel(context) { 176 OP_REQUIRES_OK(context, context->GetAttr("T", &dtype_)); 177 // This stuff is just for debugging 178 OP_REQUIRES_OK(context, context->GetAttr("sa_name", &name_)); 179 OP_REQUIRES_OK(context, context->GetAttr("id", &id_)); 180 device_ = context->device(); 181 } 182 Compute(OpKernelContext * context)183 void Compute(OpKernelContext* context) override { 184 Tensor backing_copy(context->input(0)); 185 // Check that type matches. 186 OP_REQUIRES(context, backing_copy.dtype() == dtype_, 187 errors::InvalidArgument("Backing tensor type ", 188 DataTypeString(backing_copy.dtype()), 189 " does not match expected type ", 190 DataTypeString(dtype_))); 191 const TensorBuffer* backing_buf = DMAHelper::buffer(&backing_copy); 192 const void* backing_tensor_lb = backing_buf->data(); 193 const void* backing_tensor_ub = static_cast<const void*>( 194 static_cast<const char*>(backing_tensor_lb) + backing_buf->size()); 195 for (int i = 1; i < context->num_inputs(); ++i) { 196 VLOG(1) << "_ScopedAllocatorSplitOp assigning input " << i 197 << " to output " << i - 1 << " buf addr " 198 << DMAHelper::base(&context->input(i)); 199 Tensor copy(context->input(i)); 200 OP_REQUIRES(context, copy.dtype() == dtype_, 201 errors::InvalidArgument("Input ", i, " tensor type ", 202 DataTypeString(copy.dtype()), 203 " does not match expected type ", 204 DataTypeString(dtype_))); 205 context->set_output(i - 1, copy); 206 const TensorBuffer* input_buf = DMAHelper::buffer(©); 207 const void* input_lb = input_buf->data(); 208 OP_REQUIRES( 209 context, input_lb >= backing_tensor_lb, 210 errors::InvalidArgument("Lower bound check fail for input ", i, 211 " to node ", context->op_kernel().name())); 212 const void* input_ub = static_cast<const void*>( 213 static_cast<const char*>(input_lb) + input_buf->size()); 214 OP_REQUIRES( 215 context, input_ub <= backing_tensor_ub, 216 errors::InvalidArgument("Upper bound check fail for input ", i, 217 " to node ", context->op_kernel().name())); 218 } 219 } 220 221 private: 222 DataType dtype_; 223 string name_; 224 int32 id_; 225 DeviceBase* device_; 226 }; 227 228 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorSplit").Device(DEVICE_CPU), 229 ScopedAllocatorSplitOp); 230 231 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorSplit").Device(DEVICE_GPU), 232 ScopedAllocatorSplitOp); 233 234 } // namespace tensorflow 235