1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/framework/node_properties.h"
17 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
18 #define EIGEN_USE_GPU
19 #include "tensorflow/core/common_runtime/gpu/gpu_managed_allocator.h"
20 #endif
21 
22 #include <memory>
23 #include <string>
24 #include <utility>
25 #include <vector>
26 
27 #include "tensorflow/core/common_runtime/device.h"
28 #include "tensorflow/core/common_runtime/device_factory.h"
29 #include "tensorflow/core/common_runtime/device_mgr.h"
30 #include "tensorflow/core/common_runtime/process_function_library_runtime.h"
31 #include "tensorflow/core/framework/allocator.h"
32 #include "tensorflow/core/framework/control_flow.h"
33 #include "tensorflow/core/framework/function.h"
34 #include "tensorflow/core/framework/function.pb.h"
35 #include "tensorflow/core/framework/node_def.pb.h"
36 #include "tensorflow/core/framework/op.h"
37 #include "tensorflow/core/framework/op_kernel.h"
38 #include "tensorflow/core/framework/resource_mgr.h"
39 #include "tensorflow/core/framework/tensor.h"
40 #include "tensorflow/core/framework/tensor_shape.h"
41 #include "tensorflow/core/framework/tensor_types.h"
42 #include "tensorflow/core/framework/type_index.h"
43 #include "tensorflow/core/framework/types.h"
44 #include "tensorflow/core/framework/types.pb.h"
45 #include "tensorflow/core/kernels/ops_testutil.h"
46 #include "tensorflow/core/platform/env.h"
47 #include "tensorflow/core/platform/status.h"
48 #include "tensorflow/core/protobuf/config.pb.h"
49 #include "tensorflow/core/public/session_options.h"
50 #include "tensorflow/core/public/version.h"
51 #include "tensorflow/core/util/tensor_slice_reader_cache.h"
52 
53 namespace tensorflow {
54 namespace test {
55 
SetOutputAttrs(OpKernelContext::Params * params,std::vector<AllocatorAttributes> * attrs)56 void SetOutputAttrs(OpKernelContext::Params* params,
57                     std::vector<AllocatorAttributes>* attrs) {
58   attrs->clear();
59   for (int index = 0; index < params->op_kernel->num_outputs(); index++) {
60     AllocatorAttributes attr;
61     const bool on_host =
62         (params->op_kernel->output_memory_types()[index] == HOST_MEMORY);
63     attr.set_on_host(on_host);
64     attrs->push_back(attr);
65   }
66   params->output_attr_array = attrs->data();
67 }
68 
69 }  // namespace test
70 
OpsTestBase()71 OpsTestBase::OpsTestBase() : device_type_(DEVICE_CPU) {
72   auto device = DeviceFactory::NewDevice("CPU", {}, "/job:a/replica:0/task:0");
73   CHECK(device) << "Could not create CPU device";
74 
75   thread_pool_ = std::make_unique<thread::ThreadPool>(
76       Env::Default(), /*name=*/"default", /*num_threads=*/1);
77 
78   device_ = device.get();
79   device_mgr_ = std::make_unique<StaticDeviceMgr>(std::move(device));
80 
81   allocator_ = device_->GetAllocator(AllocatorAttributes());
82 
83   flib_def_ = std::make_unique<FunctionLibraryDefinition>(
84       OpRegistry::Global(), FunctionDefLibrary{});
85   pflr_ = std::make_unique<ProcessFunctionLibraryRuntime>(
86       device_mgr_.get(), Env::Default(), /*config=*/nullptr,
87       TF_GRAPH_DEF_VERSION, flib_def_.get(), OptimizerOptions());
88 }
89 
~OpsTestBase()90 OpsTestBase::~OpsTestBase() {
91   for (auto& temp : tensors_) {
92     delete temp;
93   }
94   for (auto& temp : managed_outputs_) {
95     delete temp;
96   }
97   tensors_.clear();
98   managed_outputs_.clear();
99   context_.reset(nullptr);
100   params_.reset(nullptr);
101 }
102 
SetDevice(const DeviceType & device_type,std::unique_ptr<Device> device)103 void OpsTestBase::SetDevice(const DeviceType& device_type,
104                             std::unique_ptr<Device> device) {
105   CHECK(device_) << "No device provided";
106 
107   device_ = device.get();
108   device_type_ = device_type;
109 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
110   if (device_type == DEVICE_GPU) {
111     managed_allocator_.reset(new GpuManagedAllocator());
112     allocator_ = managed_allocator_.get();
113   } else {
114     managed_allocator_.reset();
115     allocator_ = device_->GetAllocator(AllocatorAttributes());
116   }
117 #else
118   CHECK_NE(device_type, DEVICE_GPU)
119       << "Requesting GPU on binary compiled without GOOGLE_CUDA or "
120          "TENSORFLOW_USE_ROCM.";
121   allocator_ = device_->GetAllocator(AllocatorAttributes());
122 #endif
123 
124   device_mgr_ = std::make_unique<StaticDeviceMgr>(std::move(device));
125   pflr_ = std::make_unique<ProcessFunctionLibraryRuntime>(
126       device_mgr_.get(), Env::Default(), /*config=*/nullptr,
127       TF_GRAPH_DEF_VERSION, flib_def_.get(), OptimizerOptions(),
128       thread_pool_.get());
129 }
130 
set_node_def(const NodeDef & node_def)131 void OpsTestBase::set_node_def(const NodeDef& node_def) {
132   node_def_.CopyFrom(node_def);
133 }
134 
node_def()135 NodeDef* OpsTestBase::node_def() { return &node_def_; }
136 
InitOp()137 Status OpsTestBase::InitOp() {
138   return InitOpWithGraphVersion(TF_GRAPH_DEF_VERSION);
139 }
140 
InitOpWithGraphVersion(int graph_def_version)141 Status OpsTestBase::InitOpWithGraphVersion(int graph_def_version) {
142   std::shared_ptr<const NodeProperties> props;
143   TF_RETURN_IF_ERROR(NodeProperties::CreateFromNodeDef(
144       node_def_, OpRegistry::Global(), &props));
145   OpKernel* kernel;
146   TF_RETURN_IF_ERROR(CreateOpKernel(
147       device_type_, device_, allocator(), /*flib=*/nullptr,
148       device_->resource_manager(), props, graph_def_version, &kernel));
149   kernel_.reset(kernel);
150   input_types_ = kernel_->input_types();
151   return OkStatus();
152 }
153 
RunOpKernel()154 Status OpsTestBase::RunOpKernel() {
155   // Make sure the old OpKernelContext is deleted before the Params
156   // it was using.
157   context_.reset(nullptr);
158 
159   // Delete the output copies from previous runs.
160   for (auto& temp : managed_outputs_) {
161     delete temp;
162   }
163   managed_outputs_.clear();
164   managed_outputs_.resize(0);
165 
166   params_.reset(new OpKernelContext::Params);
167   params_->device = device_;
168   params_->frame_iter = FrameAndIter(0, 0);
169   params_->inputs = inputs_;
170   params_->op_kernel = kernel_.get();
171   step_container_.reset(new ScopedStepContainer(0, [](const string&) {}));
172   params_->step_container = step_container_.get();
173   std::vector<AllocatorAttributes> attrs;
174   test::SetOutputAttrs(params_.get(), &attrs);
175   checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_wrapper;
176   params_->slice_reader_cache = &slice_reader_cache_wrapper;
177   params_->resource_manager = device_->resource_manager();
178   params_->function_library = pflr_->GetFLR(device_->name());
179 
180   context_.reset(new OpKernelContext(params_.get()));
181   device_->Compute(kernel_.get(), context_.get());
182   return context_->status();
183 }
184 
GetInput(int input_index) const185 const Tensor& OpsTestBase::GetInput(int input_index) const {
186   CHECK_LT(input_index, context_->num_inputs());
187   CHECK(!IsRefType(context_->input_dtype(input_index)));
188   return context_->input(input_index);
189 }
190 
mutable_input(int input_index)191 TensorValue OpsTestBase::mutable_input(int input_index) {
192   CHECK_LT(input_index, inputs_.size());
193   return inputs_[input_index];
194 }
195 
GetOutput(int output_index)196 Tensor* OpsTestBase::GetOutput(int output_index) {
197   CHECK_LT(output_index, context_->num_outputs());
198   Tensor* output = context_->mutable_output(output_index);
199 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
200   if (device_type_ == DEVICE_GPU) {
201     managed_outputs_.resize(context_->num_outputs());
202     // Copy the output tensor to managed memory if we haven't done so.
203     if (!managed_outputs_[output_index]) {
204       Tensor* managed_output =
205           new Tensor(allocator(), output->dtype(), output->shape());
206       auto src = output->tensor_data();
207       auto dst = managed_output->tensor_data();
208       context_->eigen_gpu_device().memcpyDeviceToHost(
209           const_cast<char*>(dst.data()), src.data(), src.size());
210       context_->eigen_gpu_device().synchronize();
211       managed_outputs_[output_index] = managed_output;
212     }
213     output = managed_outputs_[output_index];
214   }
215 #endif
216   return output;
217 }
218 
allocator()219 Allocator* OpsTestBase::allocator() { return allocator_; }
220 
op_kernel()221 OpKernel* OpsTestBase::op_kernel() { return kernel_.get(); }
222 
output_types() const223 const DataTypeVector& OpsTestBase::output_types() const {
224   return kernel_->output_types();
225 }
226 
AddInput(DataType dtype,const TensorShape & shape)227 Tensor* OpsTestBase::AddInput(DataType dtype, const TensorShape& shape) {
228   CHECK_GT(input_types_.size(), inputs_.size())
229       << "Adding more inputs than types; perhaps you need to call MakeOp";
230   bool is_ref = IsRefType(input_types_[inputs_.size()]);
231   Tensor* input = new Tensor(allocator(), dtype, shape);
232   tensors_.push_back(input);
233   if (is_ref) {
234     CHECK_EQ(RemoveRefType(input_types_[inputs_.size()]), dtype);
235     inputs_.push_back({&lock_for_refs_, input});
236   } else {
237     CHECK_EQ(input_types_[inputs_.size()], dtype);
238     inputs_.push_back({nullptr, input});
239   }
240   return input;
241 }
242 
AddResourceInputInternal(const std::string & container_name,const std::string & name,const TypeIndex & type_index)243 void OpsTestBase::AddResourceInputInternal(const std::string& container_name,
244                                            const std::string& name,
245                                            const TypeIndex& type_index) {
246   ResourceHandle handle;
247   handle.set_device(device_->name());
248   handle.set_container(container_name);
249   handle.set_name(name);
250   handle.set_hash_code(type_index.hash_code());
251   handle.set_maybe_type_name(type_index.name());
252   Tensor* input = new Tensor(allocator(), DT_RESOURCE, TensorShape({}));
253   input->scalar<ResourceHandle>()() = handle;
254   tensors_.push_back(input);
255   inputs_.push_back({nullptr, input});
256 }
257 
258 }  // namespace tensorflow
259