1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/framework/node_properties.h"
17 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
18 #define EIGEN_USE_GPU
19 #include "tensorflow/core/common_runtime/gpu/gpu_managed_allocator.h"
20 #endif
21
22 #include <memory>
23 #include <string>
24 #include <utility>
25 #include <vector>
26
27 #include "tensorflow/core/common_runtime/device.h"
28 #include "tensorflow/core/common_runtime/device_factory.h"
29 #include "tensorflow/core/common_runtime/device_mgr.h"
30 #include "tensorflow/core/common_runtime/process_function_library_runtime.h"
31 #include "tensorflow/core/framework/allocator.h"
32 #include "tensorflow/core/framework/control_flow.h"
33 #include "tensorflow/core/framework/function.h"
34 #include "tensorflow/core/framework/function.pb.h"
35 #include "tensorflow/core/framework/node_def.pb.h"
36 #include "tensorflow/core/framework/op.h"
37 #include "tensorflow/core/framework/op_kernel.h"
38 #include "tensorflow/core/framework/resource_mgr.h"
39 #include "tensorflow/core/framework/tensor.h"
40 #include "tensorflow/core/framework/tensor_shape.h"
41 #include "tensorflow/core/framework/tensor_types.h"
42 #include "tensorflow/core/framework/type_index.h"
43 #include "tensorflow/core/framework/types.h"
44 #include "tensorflow/core/framework/types.pb.h"
45 #include "tensorflow/core/kernels/ops_testutil.h"
46 #include "tensorflow/core/platform/env.h"
47 #include "tensorflow/core/platform/status.h"
48 #include "tensorflow/core/protobuf/config.pb.h"
49 #include "tensorflow/core/public/session_options.h"
50 #include "tensorflow/core/public/version.h"
51 #include "tensorflow/core/util/tensor_slice_reader_cache.h"
52
53 namespace tensorflow {
54 namespace test {
55
SetOutputAttrs(OpKernelContext::Params * params,std::vector<AllocatorAttributes> * attrs)56 void SetOutputAttrs(OpKernelContext::Params* params,
57 std::vector<AllocatorAttributes>* attrs) {
58 attrs->clear();
59 for (int index = 0; index < params->op_kernel->num_outputs(); index++) {
60 AllocatorAttributes attr;
61 const bool on_host =
62 (params->op_kernel->output_memory_types()[index] == HOST_MEMORY);
63 attr.set_on_host(on_host);
64 attrs->push_back(attr);
65 }
66 params->output_attr_array = attrs->data();
67 }
68
69 } // namespace test
70
OpsTestBase()71 OpsTestBase::OpsTestBase() : device_type_(DEVICE_CPU) {
72 auto device = DeviceFactory::NewDevice("CPU", {}, "/job:a/replica:0/task:0");
73 CHECK(device) << "Could not create CPU device";
74
75 thread_pool_ = absl::make_unique<thread::ThreadPool>(
76 Env::Default(), /*name=*/"default", /*num_threads=*/1);
77
78 device_ = device.get();
79 device_mgr_ = absl::make_unique<StaticDeviceMgr>(std::move(device));
80
81 allocator_ = device_->GetAllocator(AllocatorAttributes());
82
83 flib_def_ = absl::make_unique<FunctionLibraryDefinition>(
84 OpRegistry::Global(), FunctionDefLibrary{});
85 pflr_ = absl::make_unique<ProcessFunctionLibraryRuntime>(
86 device_mgr_.get(), Env::Default(), /*config=*/nullptr,
87 TF_GRAPH_DEF_VERSION, flib_def_.get(), OptimizerOptions());
88 }
89
~OpsTestBase()90 OpsTestBase::~OpsTestBase() {
91 for (auto& temp : tensors_) {
92 delete temp;
93 }
94 for (auto& temp : managed_outputs_) {
95 delete temp;
96 }
97 tensors_.clear();
98 managed_outputs_.clear();
99 context_.reset(nullptr);
100 params_.reset(nullptr);
101 }
102
SetDevice(const DeviceType & device_type,std::unique_ptr<Device> device)103 void OpsTestBase::SetDevice(const DeviceType& device_type,
104 std::unique_ptr<Device> device) {
105 CHECK(device_) << "No device provided";
106
107 device_ = device.get();
108 device_mgr_ = absl::make_unique<StaticDeviceMgr>(std::move(device));
109 pflr_ = absl::make_unique<ProcessFunctionLibraryRuntime>(
110 device_mgr_.get(), Env::Default(), /*config=*/nullptr,
111 TF_GRAPH_DEF_VERSION, flib_def_.get(), OptimizerOptions(),
112 thread_pool_.get());
113
114 device_type_ = device_type;
115 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
116 if (device_type == DEVICE_GPU) {
117 managed_allocator_.reset(new GpuManagedAllocator());
118 allocator_ = managed_allocator_.get();
119 } else {
120 managed_allocator_.reset();
121 allocator_ = device_->GetAllocator(AllocatorAttributes());
122 }
123 #else
124 CHECK_NE(device_type, DEVICE_GPU)
125 << "Requesting GPU on binary compiled without GOOGLE_CUDA or "
126 "TENSORFLOW_USE_ROCM.";
127 allocator_ = device_->GetAllocator(AllocatorAttributes());
128 #endif
129 }
130
set_node_def(const NodeDef & node_def)131 void OpsTestBase::set_node_def(const NodeDef& node_def) {
132 node_def_.CopyFrom(node_def);
133 }
134
node_def()135 NodeDef* OpsTestBase::node_def() { return &node_def_; }
136
InitOp()137 Status OpsTestBase::InitOp() {
138 return InitOpWithGraphVersion(TF_GRAPH_DEF_VERSION);
139 }
140
InitOpWithGraphVersion(int graph_def_version)141 Status OpsTestBase::InitOpWithGraphVersion(int graph_def_version) {
142 std::shared_ptr<const NodeProperties> props;
143 TF_RETURN_IF_ERROR(NodeProperties::CreateFromNodeDef(
144 node_def_, OpRegistry::Global(), &props));
145 OpKernel* kernel;
146 TF_RETURN_IF_ERROR(CreateOpKernel(
147 device_type_, device_, allocator(), /*flib=*/nullptr,
148 device_->resource_manager(), props, graph_def_version, &kernel));
149 kernel_.reset(kernel);
150 input_types_ = kernel_->input_types();
151 return Status::OK();
152 }
153
RunOpKernel()154 Status OpsTestBase::RunOpKernel() {
155 // Make sure the old OpKernelContext is deleted before the Params
156 // it was using.
157 context_.reset(nullptr);
158
159 // Delete the output copies from previous runs.
160 for (auto& temp : managed_outputs_) {
161 delete temp;
162 }
163 managed_outputs_.clear();
164 managed_outputs_.resize(0);
165
166 params_.reset(new OpKernelContext::Params);
167 params_->device = device_;
168 params_->frame_iter = FrameAndIter(0, 0);
169 params_->inputs = &inputs_;
170 params_->op_kernel = kernel_.get();
171 step_container_.reset(new ScopedStepContainer(0, [](const string&) {}));
172 params_->step_container = step_container_.get();
173 std::vector<AllocatorAttributes> attrs;
174 test::SetOutputAttrs(params_.get(), &attrs);
175 checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_wrapper;
176 params_->slice_reader_cache = &slice_reader_cache_wrapper;
177 params_->resource_manager = device_->resource_manager();
178 params_->function_library = pflr_->GetFLR(device_->name());
179
180 context_.reset(new OpKernelContext(params_.get()));
181 device_->Compute(kernel_.get(), context_.get());
182 return context_->status();
183 }
184
GetInput(int input_index) const185 const Tensor& OpsTestBase::GetInput(int input_index) const {
186 CHECK_LT(input_index, context_->num_inputs());
187 CHECK(!IsRefType(context_->input_dtype(input_index)));
188 return context_->input(input_index);
189 }
190
mutable_input(int input_index)191 TensorValue OpsTestBase::mutable_input(int input_index) {
192 CHECK_LT(input_index, inputs_.size());
193 return inputs_[input_index];
194 }
195
GetOutput(int output_index)196 Tensor* OpsTestBase::GetOutput(int output_index) {
197 CHECK_LT(output_index, context_->num_outputs());
198 Tensor* output = context_->mutable_output(output_index);
199 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
200 if (device_type_ == DEVICE_GPU) {
201 managed_outputs_.resize(context_->num_outputs());
202 // Copy the output tensor to managed memory if we haven't done so.
203 if (!managed_outputs_[output_index]) {
204 Tensor* managed_output =
205 new Tensor(allocator(), output->dtype(), output->shape());
206 auto src = output->tensor_data();
207 auto dst = managed_output->tensor_data();
208 context_->eigen_gpu_device().memcpyDeviceToHost(
209 const_cast<char*>(dst.data()), src.data(), src.size());
210 context_->eigen_gpu_device().synchronize();
211 managed_outputs_[output_index] = managed_output;
212 }
213 output = managed_outputs_[output_index];
214 }
215 #endif
216 return output;
217 }
218
allocator()219 Allocator* OpsTestBase::allocator() { return allocator_; }
220
output_types() const221 const DataTypeVector& OpsTestBase::output_types() const {
222 return kernel_->output_types();
223 }
224
AddInput(DataType dtype,const TensorShape & shape)225 Tensor* OpsTestBase::AddInput(DataType dtype, const TensorShape& shape) {
226 CHECK_GT(input_types_.size(), inputs_.size())
227 << "Adding more inputs than types; perhaps you need to call MakeOp";
228 bool is_ref = IsRefType(input_types_[inputs_.size()]);
229 Tensor* input = new Tensor(allocator(), dtype, shape);
230 tensors_.push_back(input);
231 if (is_ref) {
232 CHECK_EQ(RemoveRefType(input_types_[inputs_.size()]), dtype);
233 inputs_.push_back({&lock_for_refs_, input});
234 } else {
235 CHECK_EQ(input_types_[inputs_.size()], dtype);
236 inputs_.push_back({nullptr, input});
237 }
238 return input;
239 }
240
AddResourceInputInternal(const std::string & container_name,const std::string & name,const TypeIndex & type_index)241 void OpsTestBase::AddResourceInputInternal(const std::string& container_name,
242 const std::string& name,
243 const TypeIndex& type_index) {
244 ResourceHandle handle;
245 handle.set_device(device_->name());
246 handle.set_container(container_name);
247 handle.set_name(name);
248 handle.set_hash_code(type_index.hash_code());
249 handle.set_maybe_type_name(type_index.name());
250 Tensor* input = new Tensor(allocator(), DT_RESOURCE, TensorShape({}));
251 input->scalar<ResourceHandle>()() = handle;
252 tensors_.push_back(input);
253 inputs_.push_back({nullptr, input});
254 }
255
256 } // namespace tensorflow
257