1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_COMMAND_QUEUE_H_ 17 #define TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_COMMAND_QUEUE_H_ 18 19 #include <cstdint> 20 #include <string> 21 #include <vector> 22 23 #include "tensorflow/lite/delegates/gpu/cl/cl_context.h" 24 #include "tensorflow/lite/delegates/gpu/cl/cl_device.h" 25 #include "tensorflow/lite/delegates/gpu/cl/cl_event.h" 26 #include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h" 27 #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h" 28 #include "tensorflow/lite/delegates/gpu/common/status.h" 29 #include "tensorflow/lite/delegates/gpu/common/task/profiling_info.h" 30 #include "tensorflow/lite/delegates/gpu/common/types.h" 31 32 namespace tflite { 33 namespace gpu { 34 namespace cl { 35 36 // A wrapper around opencl command queue 37 class CLCommandQueue { 38 public: CLCommandQueue()39 CLCommandQueue() {} 40 CLCommandQueue(cl_command_queue queue, bool has_ownership); 41 42 // Move only 43 CLCommandQueue(CLCommandQueue&& queue); 44 CLCommandQueue& operator=(CLCommandQueue&& queue); 45 CLCommandQueue(const CLCommandQueue&) = delete; 46 CLCommandQueue& operator=(const CLCommandQueue&) = delete; 47 48 virtual ~CLCommandQueue(); 49 queue()50 cl_command_queue queue() const { return queue_; } 51 52 virtual absl::Status Dispatch(const CLKernel& kernel, 53 const int3& work_groups_count, 54 const int3& work_group_size); 55 56 absl::Status Dispatch(const CLKernel& kernel, const int3& work_groups_count, 57 const int3& work_group_size, CLEvent* event); 58 59 absl::Status EnqueueEvent(CLEvent* event); 60 61 absl::Status EnqueueWriteImage(cl_mem memory, int3 region, const void* data); 62 absl::Status EnqueueReadImage(cl_mem memory, int3 region, void* data); 63 64 absl::Status EnqueueWriteBuffer(cl_mem memory, size_t size_in_bytes, 65 const void* data); 66 absl::Status EnqueueReadBuffer(cl_mem memory, size_t size_in_bytes, 67 void* data); 68 69 absl::Status WaitForCompletion(); 70 71 protected: 72 void Release(); 73 74 cl_command_queue queue_ = nullptr; 75 bool has_ownership_ = false; 76 }; 77 78 class ProfilingCommandQueue : public CLCommandQueue { 79 public: ProfilingCommandQueue()80 ProfilingCommandQueue() {} 81 explicit ProfilingCommandQueue(cl_command_queue queue); 82 83 // Move only 84 ProfilingCommandQueue(ProfilingCommandQueue&& queue); 85 ProfilingCommandQueue& operator=(ProfilingCommandQueue&& queue); 86 ProfilingCommandQueue(const ProfilingCommandQueue&) = delete; 87 ProfilingCommandQueue& operator=(const ProfilingCommandQueue&) = delete; 88 89 absl::Status Dispatch(const CLKernel& kernel, const int3& work_groups_count, 90 const int3& work_group_size) override; 91 92 // will write index for fastest work_group among work_group_sizes 93 absl::Status GetBestWorkGroupIndex(const CLKernel& kernel, 94 const GpuInfo& gpu_info, 95 const std::vector<int3>& work_groups_count, 96 const std::vector<int3>& work_group_sizes, 97 int* index); 98 99 // call ResetMeasurements() to start new seriese of measurements 100 void ResetMeasurements(); 101 102 double GetQueueExecutionTimeMs() const; 103 104 // Difference from GetQueueExecutionTimeMs is that this number doesn't include 105 // time between kernels(kernels launches or preparing) on GPU. Usually, this 106 // time should be 5-10% better than GetQueueExecutionTimeMs, because 5-10% 107 // spend on something else(maybe kernels launches or preparing) 108 double GetSumOfEventsTimeMs() const; 109 110 // This label will be used for all subsequent dispatches. 111 void SetEventsLabel(const std::string& name); 112 113 ProfilingInfo GetProfilingInfo() const; 114 115 private: 116 std::vector<CLEvent> events_; 117 std::string current_label_; 118 }; 119 120 absl::Status CreateCLCommandQueue(const CLDevice& device, 121 const CLContext& context, 122 CLCommandQueue* result); 123 124 absl::Status CreateProfilingCommandQueue(const CLDevice& device, 125 const CLContext& context, 126 ProfilingCommandQueue* result); 127 128 } // namespace cl 129 } // namespace gpu 130 } // namespace tflite 131 132 #endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_COMMAND_QUEUE_H_ 133