1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #if !TENSORFLOW_USE_SYCL 17 #error This file must only be included when building TensorFlow with SYCL support 18 #endif 19 20 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_ 21 #define TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_ 22 23 #include "tensorflow/core/common_runtime/local_device.h" 24 #include "tensorflow/core/common_runtime/sycl/sycl_allocator.h" 25 #include "tensorflow/core/common_runtime/sycl/sycl_device_context.h" 26 #include "tensorflow/core/public/session_options.h" 27 28 namespace tensorflow { 29 30 class GSYCLInterface { 31 std::vector<Eigen::QueueInterface*> m_queue_interface_; // owned 32 std::vector<Allocator*> m_cpu_allocator_; // not owned 33 std::vector<SYCLAllocator*> m_sycl_allocator_; // owned 34 std::vector<SYCLDeviceContext*> m_sycl_context_; // ref counted GSYCLInterface()35 GSYCLInterface() { 36 bool found_device = false; 37 auto device_list = Eigen::get_sycl_supported_devices(); 38 // Obtain list of supported devices from Eigen 39 for (const auto& device : device_list) { 40 if (device.is_gpu()) { 41 // returns first found GPU 42 AddDevice(device); 43 found_device = true; 44 } 45 } 46 47 if (!found_device) { 48 // Currently Intel GPU is not supported 49 LOG(WARNING) << "No OpenCL GPU found that is supported by " 50 << "ComputeCpp/triSYCL, trying OpenCL CPU"; 51 } 52 53 for (const auto& device : device_list) { 54 if (device.is_cpu()) { 55 // returns first found CPU 56 AddDevice(device); 57 found_device = true; 58 } 59 } 60 61 if (!found_device) { 62 LOG(WARNING) << "No OpenCL CPU found that is supported by " 63 << "ComputeCpp/triSYCL, checking for host sycl device"; 64 } 65 66 for (const auto& device : device_list) { 67 // triSYCL only supports the host device for now 68 if (device.is_host()) { 69 LOG(WARNING) << "Found SYCL host device"; 70 AddDevice(device); 71 found_device = true; 72 } 73 } 74 75 if (!found_device) { 76 // Currently Intel GPU is not supported 77 LOG(FATAL) << "No SYCL host and no OpenCL GPU nor CPU" 78 << " supported by ComputeCPP/triSYCL was found"; 79 } else { 80 LOG(INFO) << "Found following OpenCL devices:"; 81 for (int i = 0; i < device_list.size(); i++) { 82 LOG(INFO) << GetShortDeviceDescription(i); 83 } 84 } 85 } 86 ~GSYCLInterface()87 ~GSYCLInterface() { 88 m_cpu_allocator_.clear(); 89 90 for (auto p : m_sycl_allocator_) { 91 p->Synchronize(); 92 p->ClearSYCLDevice(); 93 // Cannot delete the Allocator instances, as the Allocator lifetime 94 // needs to exceed any Tensor created by it. There is no way of 95 // knowing when all Tensors have been deallocated, as they are 96 // RefCounted and wait until all instances of a Tensor have been 97 // destroyed before calling Allocator.Deallocate. This could happen at 98 // program exit, which can set up a race condition between destroying 99 // Tensors and Allocators when the program is cleaning up. 100 } 101 m_sycl_allocator_.clear(); 102 103 for (auto p : m_sycl_context_) { 104 p->Unref(); 105 } 106 m_sycl_context_.clear(); 107 108 for (auto p : m_queue_interface_) { 109 p->deallocate_all(); 110 delete p; 111 } 112 m_queue_interface_.clear(); 113 } 114 AddDevice(const cl::sycl::device & d)115 void AddDevice(const cl::sycl::device& d) { 116 m_queue_interface_.push_back(new Eigen::QueueInterface(d)); 117 m_cpu_allocator_.push_back(cpu_allocator()); 118 m_sycl_allocator_.push_back(new SYCLAllocator(m_queue_interface_.back())); 119 m_sycl_context_.push_back(new SYCLDeviceContext()); 120 } 121 122 public: instance()123 static const GSYCLInterface* instance() { 124 // c++11 guarantees that this will be constructed in a thread safe way 125 static const GSYCLInterface instance; 126 return &instance; 127 } 128 129 Eigen::QueueInterface* GetQueueInterface(size_t i = 0) const { 130 if (!m_queue_interface_.empty()) { 131 return m_queue_interface_[i]; 132 } else { 133 std::cerr << "No cl::sycl::device has been added" << std::endl; 134 return nullptr; 135 } 136 } 137 138 SYCLAllocator* GetSYCLAllocator(size_t i = 0) const { 139 if (!m_sycl_allocator_.empty()) { 140 return m_sycl_allocator_[i]; 141 } else { 142 std::cerr << "No cl::sycl::device has been added" << std::endl; 143 return nullptr; 144 } 145 } 146 147 Allocator* GetCPUAllocator(size_t i = 0) const { 148 if (!m_cpu_allocator_.empty()) { 149 return m_cpu_allocator_[i]; 150 } else { 151 std::cerr << "No cl::sycl::device has been added" << std::endl; 152 return nullptr; 153 } 154 } 155 156 SYCLDeviceContext* GetSYCLContext(size_t i = 0) const { 157 if (!m_sycl_context_.empty()) { 158 return m_sycl_context_[i]; 159 } else { 160 std::cerr << "No cl::sycl::device has been added" << std::endl; 161 return nullptr; 162 } 163 } 164 165 string GetShortDeviceDescription(int device_id = 0) const { 166 Eigen::QueueInterface* queue_ptr = GetQueueInterface(device_id); 167 if (!queue_ptr) { 168 LOG(ERROR) 169 << "Device name cannot be given after Eigen QueueInterface destroyed"; 170 return ""; 171 } 172 auto device = queue_ptr->sycl_queue().get_device(); 173 auto name = device.get_info<cl::sycl::info::device::name>(); 174 auto vendor = device.get_info<cl::sycl::info::device::vendor>(); 175 auto profile = device.get_info<cl::sycl::info::device::profile>(); 176 177 std::string type; 178 if (device.is_host()) { 179 type = "Host"; 180 } else if (device.is_cpu()) { 181 type = "CPU"; 182 } else if (device.is_gpu()) { 183 type = "GPU"; 184 } else if (device.is_accelerator()) { 185 type = "Accelerator"; 186 } else { 187 type = "Unknown"; 188 } 189 190 return strings::StrCat( 191 "id: ", device_id, ", type: ", type, ", name: ", name.c_str(), 192 ", vendor: ", vendor.c_str(), ", profile: ", profile.c_str()); 193 } 194 }; 195 196 class SYCLDevice : public LocalDevice { 197 public: SYCLDevice(const SessionOptions & options,const string & name,Bytes memory_limit,const DeviceLocality & locality,const string & physical_device_desc,SYCLAllocator * sycl_allocator,Allocator * cpu_allocator,SYCLDeviceContext * ctx)198 SYCLDevice(const SessionOptions& options, const string& name, 199 Bytes memory_limit, const DeviceLocality& locality, 200 const string& physical_device_desc, SYCLAllocator* sycl_allocator, 201 Allocator* cpu_allocator, SYCLDeviceContext* ctx) 202 : LocalDevice(options, Device::BuildDeviceAttributes( 203 name, DEVICE_SYCL, memory_limit, locality, 204 physical_device_desc)), 205 cpu_allocator_(cpu_allocator), 206 sycl_allocator_(sycl_allocator), 207 device_context_(ctx) { 208 set_eigen_sycl_device(sycl_allocator->getSyclDevice()); 209 } 210 211 ~SYCLDevice() override; 212 213 void Compute(OpKernel* op_kernel, OpKernelContext* context) override; 214 Allocator* GetAllocator(AllocatorAttributes attr) override; 215 Status MakeTensorFromProto(const TensorProto& tensor_proto, 216 const AllocatorAttributes alloc_attrs, 217 Tensor* tensor) override; 218 219 Status FillContextMap(const Graph* graph, 220 DeviceContextMap* device_context_map) override; 221 222 Status Sync() override; 223 224 private: 225 Allocator* cpu_allocator_; // not owned 226 SYCLAllocator* sycl_allocator_; // not owned 227 SYCLDeviceContext* device_context_; // not owned 228 }; 229 230 } // namespace tensorflow 231 232 #endif // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_ 233