1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/framework/node_properties.h"
17 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
18 #define EIGEN_USE_GPU
19 #include "tensorflow/core/common_runtime/gpu/gpu_managed_allocator.h"
20 #endif
21
22 #include <memory>
23 #include <string>
24 #include <utility>
25 #include <vector>
26
27 #include "tensorflow/core/common_runtime/device.h"
28 #include "tensorflow/core/common_runtime/device_factory.h"
29 #include "tensorflow/core/common_runtime/device_mgr.h"
30 #include "tensorflow/core/common_runtime/process_function_library_runtime.h"
31 #include "tensorflow/core/framework/allocator.h"
32 #include "tensorflow/core/framework/control_flow.h"
33 #include "tensorflow/core/framework/function.h"
34 #include "tensorflow/core/framework/function.pb.h"
35 #include "tensorflow/core/framework/node_def.pb.h"
36 #include "tensorflow/core/framework/op.h"
37 #include "tensorflow/core/framework/op_kernel.h"
38 #include "tensorflow/core/framework/resource_mgr.h"
39 #include "tensorflow/core/framework/tensor.h"
40 #include "tensorflow/core/framework/tensor_shape.h"
41 #include "tensorflow/core/framework/tensor_types.h"
42 #include "tensorflow/core/framework/type_index.h"
43 #include "tensorflow/core/framework/types.h"
44 #include "tensorflow/core/framework/types.pb.h"
45 #include "tensorflow/core/kernels/ops_testutil.h"
46 #include "tensorflow/core/platform/env.h"
47 #include "tensorflow/core/platform/status.h"
48 #include "tensorflow/core/protobuf/config.pb.h"
49 #include "tensorflow/core/public/session_options.h"
50 #include "tensorflow/core/public/version.h"
51 #include "tensorflow/core/util/tensor_slice_reader_cache.h"
52
53 namespace tensorflow {
54 namespace test {
55
SetOutputAttrs(OpKernelContext::Params * params,std::vector<AllocatorAttributes> * attrs)56 void SetOutputAttrs(OpKernelContext::Params* params,
57 std::vector<AllocatorAttributes>* attrs) {
58 attrs->clear();
59 for (int index = 0; index < params->op_kernel->num_outputs(); index++) {
60 AllocatorAttributes attr;
61 const bool on_host =
62 (params->op_kernel->output_memory_types()[index] == HOST_MEMORY);
63 attr.set_on_host(on_host);
64 attrs->push_back(attr);
65 }
66 params->output_attr_array = attrs->data();
67 }
68
69 } // namespace test
70
OpsTestBase()71 OpsTestBase::OpsTestBase() : device_type_(DEVICE_CPU) {
72 auto device = DeviceFactory::NewDevice("CPU", {}, "/job:a/replica:0/task:0");
73 CHECK(device) << "Could not create CPU device";
74
75 thread_pool_ = std::make_unique<thread::ThreadPool>(
76 Env::Default(), /*name=*/"default", /*num_threads=*/1);
77
78 device_ = device.get();
79 device_mgr_ = std::make_unique<StaticDeviceMgr>(std::move(device));
80
81 allocator_ = device_->GetAllocator(AllocatorAttributes());
82
83 flib_def_ = std::make_unique<FunctionLibraryDefinition>(
84 OpRegistry::Global(), FunctionDefLibrary{});
85 pflr_ = std::make_unique<ProcessFunctionLibraryRuntime>(
86 device_mgr_.get(), Env::Default(), /*config=*/nullptr,
87 TF_GRAPH_DEF_VERSION, flib_def_.get(), OptimizerOptions());
88 }
89
~OpsTestBase()90 OpsTestBase::~OpsTestBase() {
91 for (auto& temp : tensors_) {
92 delete temp;
93 }
94 for (auto& temp : managed_outputs_) {
95 delete temp;
96 }
97 tensors_.clear();
98 managed_outputs_.clear();
99 context_.reset(nullptr);
100 params_.reset(nullptr);
101 }
102
SetDevice(const DeviceType & device_type,std::unique_ptr<Device> device)103 void OpsTestBase::SetDevice(const DeviceType& device_type,
104 std::unique_ptr<Device> device) {
105 CHECK(device_) << "No device provided";
106
107 device_ = device.get();
108 device_type_ = device_type;
109 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
110 if (device_type == DEVICE_GPU) {
111 managed_allocator_.reset(new GpuManagedAllocator());
112 allocator_ = managed_allocator_.get();
113 } else {
114 managed_allocator_.reset();
115 allocator_ = device_->GetAllocator(AllocatorAttributes());
116 }
117 #else
118 CHECK_NE(device_type, DEVICE_GPU)
119 << "Requesting GPU on binary compiled without GOOGLE_CUDA or "
120 "TENSORFLOW_USE_ROCM.";
121 allocator_ = device_->GetAllocator(AllocatorAttributes());
122 #endif
123
124 device_mgr_ = std::make_unique<StaticDeviceMgr>(std::move(device));
125 pflr_ = std::make_unique<ProcessFunctionLibraryRuntime>(
126 device_mgr_.get(), Env::Default(), /*config=*/nullptr,
127 TF_GRAPH_DEF_VERSION, flib_def_.get(), OptimizerOptions(),
128 thread_pool_.get());
129 }
130
set_node_def(const NodeDef & node_def)131 void OpsTestBase::set_node_def(const NodeDef& node_def) {
132 node_def_.CopyFrom(node_def);
133 }
134
node_def()135 NodeDef* OpsTestBase::node_def() { return &node_def_; }
136
InitOp()137 Status OpsTestBase::InitOp() {
138 return InitOpWithGraphVersion(TF_GRAPH_DEF_VERSION);
139 }
140
InitOpWithGraphVersion(int graph_def_version)141 Status OpsTestBase::InitOpWithGraphVersion(int graph_def_version) {
142 std::shared_ptr<const NodeProperties> props;
143 TF_RETURN_IF_ERROR(NodeProperties::CreateFromNodeDef(
144 node_def_, OpRegistry::Global(), &props));
145 OpKernel* kernel;
146 TF_RETURN_IF_ERROR(CreateOpKernel(
147 device_type_, device_, allocator(), /*flib=*/nullptr,
148 device_->resource_manager(), props, graph_def_version, &kernel));
149 kernel_.reset(kernel);
150 input_types_ = kernel_->input_types();
151 return OkStatus();
152 }
153
RunOpKernel()154 Status OpsTestBase::RunOpKernel() {
155 // Make sure the old OpKernelContext is deleted before the Params
156 // it was using.
157 context_.reset(nullptr);
158
159 // Delete the output copies from previous runs.
160 for (auto& temp : managed_outputs_) {
161 delete temp;
162 }
163 managed_outputs_.clear();
164 managed_outputs_.resize(0);
165
166 params_.reset(new OpKernelContext::Params);
167 params_->device = device_;
168 params_->frame_iter = FrameAndIter(0, 0);
169 params_->inputs = inputs_;
170 params_->op_kernel = kernel_.get();
171 step_container_.reset(new ScopedStepContainer(0, [](const string&) {}));
172 params_->step_container = step_container_.get();
173 std::vector<AllocatorAttributes> attrs;
174 test::SetOutputAttrs(params_.get(), &attrs);
175 checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_wrapper;
176 params_->slice_reader_cache = &slice_reader_cache_wrapper;
177 params_->resource_manager = device_->resource_manager();
178 params_->function_library = pflr_->GetFLR(device_->name());
179
180 context_.reset(new OpKernelContext(params_.get()));
181 device_->Compute(kernel_.get(), context_.get());
182 return context_->status();
183 }
184
GetInput(int input_index) const185 const Tensor& OpsTestBase::GetInput(int input_index) const {
186 CHECK_LT(input_index, context_->num_inputs());
187 CHECK(!IsRefType(context_->input_dtype(input_index)));
188 return context_->input(input_index);
189 }
190
mutable_input(int input_index)191 TensorValue OpsTestBase::mutable_input(int input_index) {
192 CHECK_LT(input_index, inputs_.size());
193 return inputs_[input_index];
194 }
195
GetOutput(int output_index)196 Tensor* OpsTestBase::GetOutput(int output_index) {
197 CHECK_LT(output_index, context_->num_outputs());
198 Tensor* output = context_->mutable_output(output_index);
199 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
200 if (device_type_ == DEVICE_GPU) {
201 managed_outputs_.resize(context_->num_outputs());
202 // Copy the output tensor to managed memory if we haven't done so.
203 if (!managed_outputs_[output_index]) {
204 Tensor* managed_output =
205 new Tensor(allocator(), output->dtype(), output->shape());
206 auto src = output->tensor_data();
207 auto dst = managed_output->tensor_data();
208 context_->eigen_gpu_device().memcpyDeviceToHost(
209 const_cast<char*>(dst.data()), src.data(), src.size());
210 context_->eigen_gpu_device().synchronize();
211 managed_outputs_[output_index] = managed_output;
212 }
213 output = managed_outputs_[output_index];
214 }
215 #endif
216 return output;
217 }
218
allocator()219 Allocator* OpsTestBase::allocator() { return allocator_; }
220
op_kernel()221 OpKernel* OpsTestBase::op_kernel() { return kernel_.get(); }
222
output_types() const223 const DataTypeVector& OpsTestBase::output_types() const {
224 return kernel_->output_types();
225 }
226
AddInput(DataType dtype,const TensorShape & shape)227 Tensor* OpsTestBase::AddInput(DataType dtype, const TensorShape& shape) {
228 CHECK_GT(input_types_.size(), inputs_.size())
229 << "Adding more inputs than types; perhaps you need to call MakeOp";
230 bool is_ref = IsRefType(input_types_[inputs_.size()]);
231 Tensor* input = new Tensor(allocator(), dtype, shape);
232 tensors_.push_back(input);
233 if (is_ref) {
234 CHECK_EQ(RemoveRefType(input_types_[inputs_.size()]), dtype);
235 inputs_.push_back({&lock_for_refs_, input});
236 } else {
237 CHECK_EQ(input_types_[inputs_.size()], dtype);
238 inputs_.push_back({nullptr, input});
239 }
240 return input;
241 }
242
AddResourceInputInternal(const std::string & container_name,const std::string & name,const TypeIndex & type_index)243 void OpsTestBase::AddResourceInputInternal(const std::string& container_name,
244 const std::string& name,
245 const TypeIndex& type_index) {
246 ResourceHandle handle;
247 handle.set_device(device_->name());
248 handle.set_container(container_name);
249 handle.set_name(name);
250 handle.set_hash_code(type_index.hash_code());
251 handle.set_maybe_type_name(type_index.name());
252 Tensor* input = new Tensor(allocator(), DT_RESOURCE, TensorShape({}));
253 input->scalar<ResourceHandle>()() = handle;
254 tensors_.push_back(input);
255 inputs_.push_back({nullptr, input});
256 }
257
258 } // namespace tensorflow
259