1 /* 2 * Copyright (c) 2016-2020 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_CLHELPERS_H 25 #define ARM_COMPUTE_CLHELPERS_H 26 27 #include "arm_compute/core/CL/CLTypes.h" 28 #include "arm_compute/core/CL/OpenCL.h" 29 30 #include <set> 31 #include <string> 32 33 namespace arm_compute 34 { 35 class CLCoreRuntimeContext; 36 class CLCompileContext; 37 class CLBuildOptions; 38 39 enum class DataType; 40 41 /** Max vector width of an OpenCL vector */ 42 static constexpr unsigned int max_cl_vector_width = 16; 43 44 /** Translates a tensor data type to the appropriate OpenCL type. 45 * 46 * @param[in] dt @ref DataType to be translated to OpenCL type. 47 * 48 * @return The string specifying the OpenCL type to be used. 49 */ 50 std::string get_cl_type_from_data_type(const DataType &dt); 51 52 /** Translates a tensor data type to the appropriate OpenCL promoted type. 53 * 54 * @param[in] dt @ref DataType to be used to get the promoted OpenCL type. 55 * 56 * @return The string specifying the OpenCL type to be used. 57 */ 58 std::string get_cl_promoted_type_from_data_type(const DataType &dt); 59 60 /** Translates the element size to an unsigned integer data type 61 * 62 * @param[in] element_size Size in bytes of an element. 63 * 64 * @return The string specifying the OpenCL type to be used. 65 */ 66 std::string get_cl_unsigned_type_from_element_size(size_t element_size); 67 68 /** Translates the element size to an signed integer data type 69 * 70 * @param[in] element_size Size in bytes of an element. 71 * 72 * @return The string specifying the OpenCL type to be used. 73 */ 74 std::string get_cl_signed_type_from_element_size(size_t element_size); 75 76 /** Translates a tensor data type to the appropriate OpenCL select type. 77 * 78 * @param[in] dt @ref DataType to be translated to OpenCL select type. 79 * 80 * @return The string specifying the OpenCL select type to be used. 81 */ 82 std::string get_cl_select_type_from_data_type(const DataType &dt); 83 84 /** Translates a tensor data type to the appropriate OpenCL dot8 accumulator type. 85 * 86 * @param[in] dt @ref DataType to be translated to OpenCL dot8 accumulator type. 87 * 88 * @return The string specifying the OpenCL dot8 accumulator type to be used. 89 */ 90 std::string get_cl_dot8_acc_type_from_data_type(const DataType &dt); 91 92 /** Get the size of a data type in number of bits. 93 * 94 * @param[in] dt @ref DataType. 95 * 96 * @return Number of bits in the data type specified. 97 */ 98 std::string get_data_size_from_data_type(const DataType &dt); 99 100 /** Helper function to get the GPU target from CL device 101 * 102 * @param[in] device A CL device 103 * 104 * @return the GPU target 105 */ 106 GPUTarget get_target_from_device(const cl::Device &device); 107 108 /** Helper function to get the highest OpenCL version supported 109 * 110 * @param[in] device A CL device 111 * 112 * @return the highest OpenCL version supported 113 */ 114 CLVersion get_cl_version(const cl::Device &device); 115 116 /** Helper function to get the cl_image pitch alignment in pixels 117 * 118 * @param[in] device A CL device 119 * 120 * @return the cl_image pitch alignment in pixels. If an error occurs, the function will return 0 121 */ 122 size_t get_cl_image_pitch_alignment(const cl::Device &device); 123 124 /** Helper function to check whether a given extension is supported 125 * 126 * @param[in] device A CL device 127 * @param[in] extension_name Name of the extension to be checked 128 * 129 * @return True if the extension is supported 130 */ 131 bool device_supports_extension(const cl::Device &device, const char *extension_name); 132 133 /** Helper function to check whether the cl_khr_fp16 extension is supported 134 * 135 * @param[in] device A CL device 136 * 137 * @return True if the extension is supported 138 */ 139 bool fp16_supported(const cl::Device &device); 140 /** Helper function to check whether the arm_non_uniform_work_group_size extension is supported 141 * 142 * @param[in] device A CL device 143 * 144 * @return True if the extension is supported 145 */ 146 bool arm_non_uniform_workgroup_supported(const cl::Device &device); 147 /** Helper function to check whether the cl_arm_integer_dot_product_int8 extension is supported 148 * 149 * @param[in] device A CL device 150 * 151 * @return True if the extension is supported 152 */ 153 bool dot8_supported(const cl::Device &device); 154 155 /** Helper function to check whether the cl_arm_integer_dot_product_accumulate_int8 extension is supported 156 * 157 * @param[in] device A CL device 158 * 159 * @return True if the extension is supported 160 */ 161 bool dot8_acc_supported(const cl::Device &device); 162 163 /** This function checks if the Winograd configuration (defined through the output tile, kernel size and the data layout) is supported on OpenCL 164 * 165 * @param[in] output_tile Output tile for the Winograd filtering algorithm 166 * @param[in] kernel_size Kernel size for the Winograd filtering algorithm 167 * @param[in] data_layout Data layout of the input tensor 168 * 169 * @return True if the configuration is supported 170 */ 171 bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout); 172 173 /** Helper function to get the preferred native vector width size for built-in scalar types that can be put into vectors 174 * 175 * @param[in] device A CL device 176 * @param[in] dt data type 177 * 178 * @return preferred vector width 179 */ 180 size_t preferred_vector_width(const cl::Device &device, DataType dt); 181 182 /** Helper function to check if "dummy work-items" are preferred to have a power of two NDRange 183 * In case dummy work-items is enabled, it is OpenCL kernel responsibility to check if the work-item is out-of range or not 184 * 185 * @param[in] device A CL device 186 * 187 * @return True if dummy work-items should be preferred to dispatch the NDRange 188 */ 189 bool preferred_dummy_work_items_support(const cl::Device &device); 190 191 /** Helper function to check whether the cl_khr_image2d_from_buffer extension is supported 192 * 193 * @param[in] device A CL device 194 * 195 * @return True if the extension is supported 196 */ 197 bool image2d_from_buffer_supported(const cl::Device &device); 198 199 /** Creates an opencl kernel 200 * 201 * @param[in] ctx A context to be used to create the opencl kernel. 202 * @param[in] kernel_name The kernel name. 203 * @param[in] build_opts The build options to be used for the opencl kernel compilation. 204 * 205 * @return An opencl kernel 206 */ 207 cl::Kernel create_opencl_kernel(CLCoreRuntimeContext *ctx, const std::string &kernel_name, const CLBuildOptions &build_opts); 208 209 /** Creates an opencl kernel using a compile context 210 * 211 * @param[in] ctx A compile context to be used to create the opencl kernel. 212 * @param[in] kernel_name The kernel name. 213 * @param[in] build_opts The build options to be used for the opencl kernel compilation. 214 * 215 * @return An opencl kernel 216 */ 217 cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set<std::string> &build_opts = std::set<std::string>()); 218 219 /** Creates a suitable LWS hint object for parallel implementations. Sets the number of WG based on the input size. 220 * If input width is smaller than 128 we can use fewer threads than 8. 221 * 222 * @param[in] input_dimension number of elements along the dimension to apply the parallellization 223 * @param[in] vector_size size of the vector in OpenCL 224 * 225 * @return An LWS hint object 226 */ 227 cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size); 228 229 } // namespace arm_compute 230 #endif /* ARM_COMPUTE_CLHELPERS_H */ 231