• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/core/CL/CLKernelLibrary.h"
25 
26 #include "arm_compute/core/CL/CLHelpers.h"
27 #include "arm_compute/core/Error.h"
28 #include "arm_compute/core/Utils.h"
29 #include "support/StringSupport.h"
30 
31 #include <algorithm>
32 #include <fstream>
33 #include <iostream>
34 #include <utility>
35 #include <vector>
36 
37 using namespace arm_compute;
38 const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
39 {
40     { "absdiff", "absdiff.cl" },
41     { "accumulate", "accumulate.cl" },
42     { "accumulate_squared", "accumulate.cl" },
43     { "accumulate_weighted", "accumulate.cl" },
44     { "activation_layer", "activation_layer.cl" },
45     { "activation_layer_quant", "activation_layer_quant.cl" },
46     { "activation_layer_quant_f32", "activation_layer_quant.cl" },
47     { "arg_min_max_x", "arg_min_max.cl" },
48     { "arg_min_max_y", "arg_min_max.cl" },
49     { "arg_min_max_z", "arg_min_max.cl" },
50     { "arg_min_max_w", "arg_min_max.cl" },
51     { "batch_to_space_nchw", "batch_to_space.cl" },
52     { "batch_to_space_static_nchw", "batch_to_space.cl" },
53     { "batch_to_space_nhwc", "batch_to_space.cl" },
54     { "batch_to_space_static_nhwc", "batch_to_space.cl" },
55     { "batchnormalization_layer_nchw", "batchnormalization_layer.cl" },
56     { "batchnormalization_layer_nhwc", "batchnormalization_layer.cl" },
57     { "bitwise_or", "bitwise_op.cl" },
58     { "bitwise_and", "bitwise_op.cl" },
59     { "bitwise_xor", "bitwise_op.cl" },
60     { "bitwise_not", "bitwise_op.cl" },
61     { "bounding_box_transform", "bounding_box_transform.cl" },
62     { "bounding_box_transform_quantized", "bounding_box_transform_quantized.cl" },
63     { "channel_combine_NV", "channel_combine.cl" },
64     { "channel_combine_RGB888", "channel_combine.cl" },
65     { "channel_combine_RGBA8888", "channel_combine.cl" },
66     { "channel_combine_UYVY422", "channel_combine.cl" },
67     { "channel_combine_YUYV422", "channel_combine.cl" },
68     { "channel_shuffle_nchw", "channel_shuffle.cl" },
69     { "channel_shuffle_nhwc", "channel_shuffle.cl" },
70     { "channel_extract_NV12", "channel_extract.cl" },
71     { "channel_extract_NV21", "channel_extract.cl" },
72     { "channel_extract_RGB888", "channel_extract.cl" },
73     { "channel_extract_RGBA8888", "channel_extract.cl" },
74     { "channel_extract_UYVY422", "channel_extract.cl" },
75     { "channel_extract_YUYV422", "channel_extract.cl" },
76     { "combine_gradients_L1", "canny.cl" },
77     { "combine_gradients_L2", "canny.cl" },
78     { "compare_equal", "comparisons.cl" },
79     { "compare_equal_quantized", "comparisons.cl" },
80     { "compare_notequal", "comparisons.cl" },
81     { "compare_notequal_quantized", "comparisons.cl" },
82     { "compare_greater", "comparisons.cl" },
83     { "compare_greater_quantized", "comparisons.cl" },
84     { "compare_greaterequal", "comparisons.cl" },
85     { "compare_greaterequal_quantized", "comparisons.cl" },
86     { "compare_less", "comparisons.cl" },
87     { "compare_less_quantized", "comparisons.cl" },
88     { "compare_lessequal", "comparisons.cl" },
89     { "compare_lessequal_quantized", "comparisons.cl" },
90     { "concatenate", "concatenate.cl" },
91     { "concatenate_width", "concatenate.cl" },
92     { "concatenate_height", "concatenate.cl" },
93     { "concatenate_width_x2", "concatenate.cl" },
94     { "concatenate_width_x4", "concatenate.cl" },
95     { "convolution_rectangle", "convolution_rectangle.cl" },
96     { "col2im", "col2im.cl" },
97     { "convert_depth_down", "depth_convert.cl" },
98     { "convert_depth_up", "depth_convert.cl" },
99     { "convert_fc_weights", "convert_fc_weights.cl" },
100     { "convolution3x3_static", "convolution3x3.cl" },
101     { "convolution5x5_static", "convolution5x5.cl" },
102     { "convolution7x7_static", "convolution7x7.cl" },
103     { "convolution9x9_static", "convolution9x9.cl" },
104     { "convolution_separable1x5_static", "convolution5x5.cl" },
105     { "convolution_separable5x1_static", "convolution5x5.cl" },
106     { "convolution_separable1x7_static", "convolution7x7.cl" },
107     { "convolution_separable7x1_static", "convolution7x7.cl" },
108     { "convolution_separable1x9_static", "convolution9x9.cl" },
109     { "convolution_separable9x1_static", "convolution9x9.cl" },
110     { "copy_tensor", "copy_tensor.cl" },
111     { "copy_plane", "channel_extract.cl" },
112     { "copy_planes_3p", "channel_combine.cl" },
113     { "copy_to_keypoint", "fast_corners.cl" },
114     { "crop_tensor", "crop_tensor.cl" },
115     { "deconvolution_reshape", "deconvolution_layer.cl" },
116     { "deconvolution_upsample", "deconvolution_layer.cl" },
117     { "depthwise_convolution_3x3", "depthwise_convolution.cl" },
118     { "depthwise_convolution_3x3_f16", "depthwise_convolution.cl" },
119     { "depthwise_convolution_3x3_nhwc", "depthwise_convolution.cl" },
120     { "depthwise_convolution_3x3_nhwc_stride1", "depthwise_convolution.cl" },
121     { "dwc_MxN_native_fp_nhwc", "depthwise_convolution.cl" },
122     { "dwc_MxN_native_quantized8_nhwc", "depthwise_convolution_quantized.cl" },
123     { "dwc_3x3_native_quantized8_nchw", "depthwise_convolution_quantized.cl" },
124     { "dwc_3x3_native_quantized8_dot8_nchw", "depthwise_convolution_quantized.cl" },
125     { "dwc_3x3_reshaped_quantized8_nhwc", "depthwise_convolution_quantized.cl" },
126     { "dwc_3x3_reshaped_quantized8_stride1_nhwc", "depthwise_convolution_quantized.cl" },
127     { "dwc_3x3_reshaped_quantized8_dot8_stride1_nhwc", "depthwise_convolution_quantized.cl" },
128     { "depth_to_space_nchw", "depth_to_space.cl" },
129     { "depth_to_space_nhwc", "depth_to_space.cl" },
130     { "depthwise_convolution_3x3_stridex1_stridey1_bifrost_f16", "depthwise_convolution.cl" },
131     { "depthwise_convolution_3x3_stridex2_stridey2_bifrost_f16", "depthwise_convolution.cl" },
132     { "depthwise_convolution_3x3_stridex1_stridey1_bifrost_f32", "depthwise_convolution.cl" },
133     { "depthwise_convolution_3x3_stridex2_stridey2_bifrost_f32", "depthwise_convolution.cl" },
134     { "depthwise_convolution_reshape_weights", "depthwise_convolution.cl" },
135     { "dequantization_layer", "dequantization_layer.cl" },
136     { "dequantization_layer_per_channel_nhwc", "dequantization_layer.cl" },
137     { "dequantization_layer_per_channel_nchw", "dequantization_layer.cl" },
138     { "derivative", "derivative.cl" },
139     { "dilate", "dilate.cl" },
140     { "direct_convolution1x1", "direct_convolution1x1.cl" },
141     { "direct_convolution1x1_nhwc", "direct_convolution1x1.cl" },
142     { "direct_convolution1x1_f32_bifrost", "direct_convolution1x1.cl" },
143     { "direct_convolution3x3", "direct_convolution3x3.cl" },
144     { "direct_convolution3x3_nhwc", "direct_convolution3x3.cl" },
145     { "direct_convolution3x3_f32_bifrost", "direct_convolution3x3.cl" },
146     { "direct_convolution5x5", "direct_convolution5x5.cl" },
147     { "direct_convolution5x5_nhwc", "direct_convolution5x5.cl" },
148     { "direct_convolution5x5_f32_bifrost", "direct_convolution5x5.cl" },
149     { "direct_convolution_quantized", "direct_convolution_quantized.cl" },
150     { "direct_convolution9x9_nhwc", "direct_convolution9x9.cl" },
151     { "elementwise_operation_ADD", "elementwise_operation.cl" },
152     { "elementwise_operation_SUB", "elementwise_operation.cl" },
153     { "elementwise_operation_MAX", "elementwise_operation.cl" },
154     { "elementwise_operation_MIN", "elementwise_operation.cl" },
155     { "elementwise_operation_DIV", "elementwise_operation.cl" },
156     { "elementwise_operation_SQUARED_DIFF", "elementwise_operation.cl" },
157     { "elementwise_operation_POWER", "elementwise_operation.cl" },
158     { "elementwise_operation_PRELU", "elementwise_operation.cl" },
159     { "elementwise_operation_AND", "elementwise_operation.cl" },
160     { "elementwise_operation_OR", "elementwise_operation.cl" },
161     { "elementwise_operation_ADD_quantized", "elementwise_operation_quantized.cl" },
162     { "elementwise_operation_SUB_quantized", "elementwise_operation_quantized.cl" },
163     { "elementwise_operation_MAX_quantized", "elementwise_operation_quantized.cl" },
164     { "elementwise_operation_MIN_quantized", "elementwise_operation_quantized.cl" },
165     { "elementwise_operation_DIV_quantized", "elementwise_operation_quantized.cl" },
166     { "elementwise_operation_SQUARED_DIFF_quantized", "elementwise_operation_quantized.cl" },
167     { "elementwise_operation_PRELU_quantized", "elementwise_operation_quantized.cl" },
168     { "elementwise_unary", "elementwise_unary.cl" },
169     { "erode", "erode.cl" },
170     { "fast_corners", "fast_corners.cl" },
171     { "fft_digit_reverse_axis_0", "fft_digit_reverse.cl" },
172     { "fft_digit_reverse_axis_1", "fft_digit_reverse.cl" },
173     { "fft_radix_2_first_stage_axis_0", "fft.cl" },
174     { "fft_radix_2_first_stage_axis_1", "fft.cl" },
175     { "fft_radix_2_axis_0", "fft.cl" },
176     { "fft_radix_2_axis_1", "fft.cl" },
177     { "fft_radix_3_first_stage_axis_0", "fft.cl" },
178     { "fft_radix_3_first_stage_axis_1", "fft.cl" },
179     { "fft_radix_3_axis_0", "fft.cl" },
180     { "fft_radix_3_axis_1", "fft.cl" },
181     { "fft_radix_4_first_stage_axis_0", "fft.cl" },
182     { "fft_radix_4_first_stage_axis_1", "fft.cl" },
183     { "fft_radix_4_axis_0", "fft.cl" },
184     { "fft_radix_4_axis_1", "fft.cl" },
185     { "fft_radix_5_first_stage_axis_0", "fft.cl" },
186     { "fft_radix_5_first_stage_axis_1", "fft.cl" },
187     { "fft_radix_5_axis_0", "fft.cl" },
188     { "fft_radix_5_axis_1", "fft.cl" },
189     { "fft_radix_7_first_stage_axis_0", "fft.cl" },
190     { "fft_radix_7_first_stage_axis_1", "fft.cl" },
191     { "fft_radix_7_axis_0", "fft.cl" },
192     { "fft_radix_7_axis_1", "fft.cl" },
193     { "fft_radix_8_first_stage_axis_0", "fft.cl" },
194     { "fft_radix_8_first_stage_axis_1", "fft.cl" },
195     { "fft_radix_8_axis_0", "fft.cl" },
196     { "fft_radix_8_axis_1", "fft.cl" },
197     { "fft_scale_conj", "fft_scale.cl" },
198     { "fill_image_borders_constant", "fill_border.cl" },
199     { "fill_image_borders_replicate", "fill_border.cl" },
200     { "finalize", "optical_flow_pyramid_lk.cl" },
201     { "flatten", "flatten.cl" },
202     { "floor_layer", "floor.cl" },
203     { "fuse_batchnormalization_layer", "batchnormalization_layer.cl" },
204     { "gather", "gather.cl" },
205     { "gaussian1x5_sub_x", "gaussian_pyramid.cl" },
206     { "gaussian5x1_sub_y", "gaussian_pyramid.cl" },
207     { "gemm_ma_f16", "gemm.cl" },
208     { "gemm_ma_f32", "gemm.cl" },
209     { "gemm_mv", "gemv.cl" },
210     { "gemm_mv_quantized", "gemv.cl" },
211     { "gemm_mm_interleaved_transposed_f16", "gemm_v1.cl" },
212     { "gemm_mm_interleaved_transposed_f16_acc32", "gemm_v1.cl" },
213     { "gemm_mm_interleaved_transposed_f16_bifrost", "gemm_v1.cl" },
214     { "gemm_mm_interleaved_transposed_f32", "gemm_v1.cl" },
215     { "gemm_mm_interleaved_transposed_f32_bifrost", "gemm_v1.cl" },
216     { "gemm_mm_floating_point", "gemm_v1.cl" },
217     { "gemm_mm_floating_point_f16_bifrost", "gemm_v1.cl" },
218     { "gemm_mm_floating_point_f16_bifrost_acc32", "gemm_v1.cl" },
219     { "gemm_mm_floating_point_f32_bifrost", "gemm_v1.cl" },
220     { "gemm_mm_floating_point_f32_bifrost_1000", "gemm_v1.cl" },
221     { "gemm_mm_native", "gemm.cl" },
222     { "gemm_mm_reshaped_lhs_nt_rhs_t", "gemm.cl" },
223     { "gemm_mm_reshaped_lhs_nt_rhs_t_texture", "gemm.cl" },
224     { "gemm_mm_reshaped_lhs_t_rhs_nt", "gemm.cl" },
225     { "gemm_mm_reshaped_lhs_t_rhs_nt_texture", "gemm.cl" },
226     { "gemm_mm_reshaped_only_rhs_nt", "gemm.cl" },
227     { "gemm_mm_reshaped_only_rhs_nt_texture", "gemm.cl" },
228     { "gemm_mm_reshaped_only_rhs_t", "gemm.cl" },
229     { "gemm_mm_reshaped_only_rhs_t_texture", "gemm.cl" },
230     { "gemm_lc_vm_f32", "gemm.cl" },
231     { "gemm_reshape_lhs_matrix_nt", "gemm.cl" },
232     { "gemm_reshape_lhs_matrix_t", "gemm.cl" },
233     { "gemm_reshape_rhs_matrix_nt", "gemm.cl" },
234     { "gemm_reshape_rhs_matrix_t", "gemm.cl" },
235     { "gemmlowp_matrix_a_reduction", "gemmlowp.cl" },
236     { "gemmlowp_matrix_a_reduction_dot8", "gemmlowp.cl" },
237     { "gemmlowp_matrix_b_reduction", "gemmlowp.cl" },
238     { "gemmlowp_mm_native", "gemmlowp.cl" },
239     { "gemmlowp_mm_reshaped_lhs_nt_rhs_t", "gemmlowp.cl" },
240     { "gemmlowp_mm_reshaped_only_rhs_t", "gemmlowp.cl" },
241     { "gemmlowp_mm_reshaped_only_rhs_t_fused_output_stage_fixedpoint", "gemmlowp.cl" },
242     { "gemmlowp_offset_contribution", "gemmlowp.cl" },
243     { "gemmlowp_offset_contribution_quantize_down", "gemmlowp.cl" },
244     { "gemmlowp_offset_contribution_quantize_down_fixedpoint", "gemmlowp.cl" },
245     { "gemmlowp_output_stage_quantize_down", "gemmlowp.cl" },
246     { "gemmlowp_output_stage_quantize_down_fixedpoint", "gemmlowp.cl" },
247     { "gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16", "gemmlowp.cl" },
248     { "gemmlowp_output_stage_quantize_down_float", "gemmlowp.cl" },
249     { "generate_proposals_compute_all_anchors", "generate_proposals.cl" },
250     { "generate_proposals_compute_all_anchors_quantized", "generate_proposals_quantized.cl" },
251     { "harris_score_3x3", "harris_corners.cl" },
252     { "harris_score_5x5", "harris_corners.cl" },
253     { "harris_score_7x7", "harris_corners.cl" },
254     { "hist_border_kernel", "histogram.cl" },
255     { "hist_border_kernel_fixed", "histogram.cl" },
256     { "hist_local_kernel", "histogram.cl" },
257     { "hist_local_kernel_fixed", "histogram.cl" },
258     { "hog_block_normalization", "hog.cl" },
259     { "hog_detector", "hog.cl" },
260     { "hog_orientation_binning", "hog.cl" },
261     { "hysteresis", "canny.cl" },
262     { "im2col1x1_stridex1_nchw", "im2col.cl" },
263     { "im2col3x3_nchw", "im2col.cl" },
264     { "im2col5x5_nchw", "im2col.cl" },
265     { "im2col11x11_padx0_pady0_nchw", "im2col.cl" },
266     { "im2col_generic_nchw", "im2col.cl" },
267     { "im2col_generic_padx0_pady0_nchw", "im2col.cl" },
268     { "im2col3x3_nhwc", "im2col.cl" },
269     { "im2col9x9_nhwc", "im2col.cl" },
270     { "im2col_generic_nhwc", "im2col.cl" },
271     { "init_level", "optical_flow_pyramid_lk.cl" },
272     { "init_level_max", "optical_flow_pyramid_lk.cl" },
273     { "init_level_max_initial_estimate", "optical_flow_pyramid_lk.cl" },
274     { "instance_normalization", "instance_normalization.cl" },
275     { "integral_horizontal", "integral_image.cl" },
276     { "integral_vertical", "integral_image.cl" },
277     { "IYUV_to_NV12_bt709", "color_convert.cl" },
278     { "IYUV_to_RGB888_bt709", "color_convert.cl" },
279     { "IYUV_to_RGBA8888_bt709", "color_convert.cl" },
280     { "IYUV_to_YUV444_bt709", "color_convert.cl" },
281     { "l2_normalize_x", "l2_normalize.cl" },
282     { "l2_normalize_y", "l2_normalize.cl" },
283     { "l2_normalize_z", "l2_normalize.cl" },
284     { "lktracker_stage0", "optical_flow_pyramid_lk.cl" },
285     { "lktracker_stage1", "optical_flow_pyramid_lk.cl" },
286     { "magnitude_phase", "magnitude_phase.cl" },
287     { "max_unpooling_layer_2", "unpooling_layer.cl" },
288     { "mean_stddev_accumulate", "mean_stddev.cl" },
289     { "mean_stddev_normalization", "mean_stddev_normalization.cl" },
290     { "memset", "memset.cl" },
291     { "minmax", "minmaxloc.cl" },
292     { "minmax_border", "minmaxloc.cl" },
293     { "minmax_layer", "minmax_layer.cl" },
294     { "minmaxloc", "minmaxloc.cl" },
295     { "non_linear_filter_box3x3", "non_linear_filter3x3.cl" },
296     { "non_linear_filter_cross3x3", "non_linear_filter3x3.cl" },
297     { "non_linear_filter_disk3x3", "non_linear_filter3x3.cl" },
298     { "non_linear_filter_box5x5", "non_linear_filter5x5.cl" },
299     { "non_linear_filter_cross5x5", "non_linear_filter5x5.cl" },
300     { "non_linear_filter_disk5x5", "non_linear_filter5x5.cl" },
301     { "non_max_suppression", "nonmax.cl" },
302     { "normalization_layer_cross_map", "normalization_layer.cl" },
303     { "normalization_layer_in_map_nchw", "normalization_layer.cl" },
304     { "normalization_layer_in_map_nhwc", "normalization_layer.cl" },
305     { "normalize_planar_yuv_layer_nchw", "normalize_planar_yuv_layer.cl" },
306     { "normalize_planar_yuv_layer_nhwc", "normalize_planar_yuv_layer.cl" },
307     { "normalize_planar_yuv_layer_q8_nchw", "normalize_planar_yuv_layer_quantized.cl" },
308     { "normalize_planar_yuv_layer_q8_nhwc", "normalize_planar_yuv_layer_quantized.cl" },
309     { "NV12_to_IYUV_bt709", "color_convert.cl" },
310     { "NV12_to_RGB888_bt709", "color_convert.cl" },
311     { "NV12_to_RGBA8888_bt709", "color_convert.cl" },
312     { "NV12_to_YUV444_bt709", "color_convert.cl" },
313     { "NV21_to_IYUV_bt709", "color_convert.cl" },
314     { "NV21_to_RGB888_bt709", "color_convert.cl" },
315     { "NV21_to_RGBA8888_bt709", "color_convert.cl" },
316     { "NV21_to_YUV444_bt709", "color_convert.cl" },
317     { "pad_layer_constant", "pad_layer.cl" },
318     { "pad_layer_symmetric_reflect", "pad_layer.cl" },
319     { "permute", "permute.cl" },
320     { "pixelwise_mul_complex", "pixelwise_mul_float.cl" },
321     { "pixelwise_mul_float", "pixelwise_mul_float.cl" },
322     { "pixelwise_mul_int", "pixelwise_mul_int.cl" },
323     { "pixelwise_mul_quantized", "pixelwise_mul_int.cl" },
324     { "pooling_layer_2", "pooling_layer.cl" },
325     { "pooling_layer_3", "pooling_layer.cl" },
326     { "pooling_layer_optimized_3", "pooling_layer.cl" },
327     { "pooling_layer_7", "pooling_layer.cl" },
328     { "pooling_layer_MxN_nchw", "pooling_layer.cl" },
329     { "pooling_layer_MxN_nhwc", "pooling_layer.cl" },
330     { "pooling_layer_2x2_nhwc", "pooling_layer.cl" },
331     { "pooling_layer_2_nchw_indices_fp32", "pooling_layer.cl" },
332     { "pooling_layer_2_nchw_indices_fp16", "pooling_layer.cl" },
333     { "pooling_layer_MxN_quantized_nhwc", "pooling_layer_quantized.cl" },
334     { "pooling_layer_MxN_quantized_nchw", "pooling_layer_quantized.cl" },
335     { "prior_box_layer_nchw", "prior_box_layer.cl" },
336     { "qlstm_layer_normalization", "qlstm_layer_normalization.cl" },
337     { "quantization_layer", "quantization_layer.cl" },
338     { "range", "range.cl" },
339     { "range_quantized", "range.cl" },
340     { "reduction_operation_x", "reduction_operation.cl" },
341     { "reduction_operation_non_parallel_x", "reduction_operation.cl" },
342     { "reduction_operation_y", "reduction_operation.cl" },
343     { "reduction_operation_z", "reduction_operation.cl" },
344     { "reduction_operation_w", "reduction_operation.cl" },
345     { "remap_nearest_neighbour", "remap.cl" },
346     { "remap_bilinear", "remap.cl" },
347     { "reorg_layer_nchw", "reorg_layer.cl" },
348     { "reorg_layer_nhwc", "reorg_layer.cl" },
349     { "reshape_layer", "reshape_layer.cl" },
350     { "reshape_to_columns", "convolution_layer.cl" },
351     { "reverse", "reverse.cl" },
352     { "RGB888_to_IYUV_bt709", "color_convert.cl" },
353     { "RGB888_to_NV12_bt709", "color_convert.cl" },
354     { "RGB888_to_RGBA8888_bt709", "color_convert.cl" },
355     { "RGB888_to_U8_bt709", "color_convert.cl" },
356     { "RGB888_to_YUV444_bt709", "color_convert.cl" },
357     { "RGBA8888_to_IYUV_bt709", "color_convert.cl" },
358     { "RGBA8888_to_NV12_bt709", "color_convert.cl" },
359     { "RGBA8888_to_RGB888_bt709", "color_convert.cl" },
360     { "RGBA8888_to_YUV444_bt709", "color_convert.cl" },
361     { "roi_align_layer", "roi_align_layer.cl" },
362     { "roi_align_layer_quantized", "roi_align_layer_quantized.cl" },
363     { "roi_pooling_layer", "roi_pooling_layer.cl" },
364     { "scale_nearest_neighbour_nchw", "scale.cl" },
365     { "scale_nearest_neighbour_nhwc", "scale.cl" },
366     { "scale_bilinear_nchw", "scale.cl" },
367     { "scale_bilinear_nhwc", "scale.cl" },
368     { "scale_bilinear_quantized_nchw", "scale_quantized.cl" },
369     { "scale_bilinear_quantized_nhwc", "scale_quantized.cl" },
370     { "scharr3x3", "scharr_filter.cl" },
371     { "select_same_rank", "select.cl" },
372     { "select_different_rank_2", "select.cl" },
373     { "select_different_rank_n", "select.cl" },
374     { "sobel3x3", "sobel_filter.cl" },
375     { "sobel_separable5x1", "sobel_filter.cl" },
376     { "sobel_separable1x5", "sobel_filter.cl" },
377     { "sobel_separable7x1", "sobel_filter.cl" },
378     { "sobel_separable1x7", "sobel_filter.cl" },
379     { "softmax_layer_norm", "softmax_layer.cl" },
380     { "softmax_layer_norm_quantized", "softmax_layer_quantized.cl" },
381     { "softmax_layer_max_shift_exp_sum_quantized_serial", "softmax_layer_quantized.cl" },
382     { "softmax_layer_max_shift_exp_sum_quantized_parallel", "softmax_layer_quantized.cl" },
383     { "softmax_layer_max_shift_exp_sum_serial", "softmax_layer.cl" },
384     { "space_to_batch_nchw", "space_to_batch.cl" },
385     { "space_to_batch_static_nchw", "space_to_batch.cl" },
386     { "space_to_batch_nhwc", "space_to_batch.cl" },
387     { "space_to_batch_static_nhwc", "space_to_batch.cl" },
388     { "space_to_depth_nchw", "space_to_depth.cl" },
389     { "space_to_depth_nhwc", "space_to_depth.cl" },
390     { "softmax_layer_max_shift_exp_sum_parallel", "softmax_layer.cl" },
391     { "stack_layer", "stack_layer.cl" },
392     { "strided_slice", "slice_ops.cl" },
393     { "suppress_non_maximum", "canny.cl" },
394     { "tablelookup_U8", "tablelookup.cl" },
395     { "tablelookup_S16", "tablelookup.cl" },
396     { "threshold_binary", "threshold.cl" },
397     { "threshold_range", "threshold.cl" },
398     { "tile", "tile.cl" },
399     { "transpose", "transpose.cl" },
400     { "UYVY422_to_IYUV_bt709", "color_convert.cl" },
401     { "UYVY422_to_NV12_bt709", "color_convert.cl" },
402     { "UYVY422_to_RGB888_bt709", "color_convert.cl" },
403     { "UYVY422_to_RGBA8888_bt709", "color_convert.cl" },
404     { "upsample_layer_nchw", "upsample_layer.cl" },
405     { "upsample_layer_nhwc", "upsample_layer.cl" },
406     { "warp_affine_nearest_neighbour", "warp_affine.cl" },
407     { "warp_affine_bilinear", "warp_affine.cl" },
408     { "warp_perspective_nearest_neighbour", "warp_perspective.cl" },
409     { "warp_perspective_bilinear", "warp_perspective.cl" },
410     { "winograd_filter_transform_2x2_3x3_nchw", "winograd_filter_transform.cl" },
411     { "winograd_filter_transform_2x1_3x1_nchw", "winograd_filter_transform.cl" },
412     { "winograd_filter_transform_1x2_1x3_nchw", "winograd_filter_transform.cl" },
413     { "winograd_filter_transform_4x4_3x3_nchw", "winograd_filter_transform.cl" },
414     { "winograd_filter_transform_4x1_3x1_nchw", "winograd_filter_transform.cl" },
415     { "winograd_filter_transform_1x4_1x3_nchw", "winograd_filter_transform.cl" },
416     { "winograd_filter_transform_4x4_5x5_nchw", "winograd_filter_transform.cl" },
417     { "winograd_filter_transform_4x1_5x1_nchw", "winograd_filter_transform.cl" },
418     { "winograd_filter_transform_1x4_1x5_nchw", "winograd_filter_transform.cl" },
419     { "winograd_filter_transform_4x1_3x1_nhwc", "winograd_filter_transform.cl" },
420     { "winograd_filter_transform_1x4_1x3_nhwc", "winograd_filter_transform.cl" },
421     { "winograd_filter_transform_4x4_3x3_nhwc", "winograd_filter_transform.cl" },
422     { "winograd_filter_transform_4x4_5x5_nhwc", "winograd_filter_transform.cl" },
423     { "winograd_filter_transform_4x1_5x1_nhwc", "winograd_filter_transform.cl" },
424     { "winograd_filter_transform_1x4_1x5_nhwc", "winograd_filter_transform.cl" },
425     { "winograd_filter_transform_2x2_7x7_nhwc", "winograd_filter_transform.cl" },
426     { "winograd_filter_transform_2x1_7x1_nhwc", "winograd_filter_transform.cl" },
427     { "winograd_filter_transform_1x2_1x7_nhwc", "winograd_filter_transform.cl" },
428     { "winograd_input_transform_2x2_3x3_stepz1_nchw", "winograd_input_transform.cl" },
429     { "winograd_input_transform_2x2_3x3_stepz2_nchw", "winograd_input_transform.cl" },
430     { "winograd_input_transform_2x1_3x1_stepz1_nchw", "winograd_input_transform.cl" },
431     { "winograd_input_transform_2x1_3x1_stepz2_nchw", "winograd_input_transform.cl" },
432     { "winograd_input_transform_1x2_1x3_stepz1_nchw", "winograd_input_transform.cl" },
433     { "winograd_input_transform_1x2_1x3_stepz2_nchw", "winograd_input_transform.cl" },
434     { "winograd_input_transform_4x4_3x3_stepz1_nchw", "winograd_input_transform.cl" },
435     { "winograd_input_transform_4x1_3x1_stepz1_nchw", "winograd_input_transform.cl" },
436     { "winograd_input_transform_1x4_1x3_stepz1_nchw", "winograd_input_transform.cl" },
437     { "winograd_input_transform_4x4_5x5_stepz1_nchw", "winograd_input_transform.cl" },
438     { "winograd_input_transform_4x1_5x1_stepz1_nchw", "winograd_input_transform.cl" },
439     { "winograd_input_transform_1x4_1x5_stepz1_nchw", "winograd_input_transform.cl" },
440     { "winograd_input_transform_4x1_3x1_stepz1_nhwc", "winograd_input_transform.cl" },
441     { "winograd_input_transform_1x4_1x3_stepz1_nhwc", "winograd_input_transform.cl" },
442     { "winograd_input_transform_4x4_3x3_stepz1_nhwc", "winograd_input_transform.cl" },
443     { "winograd_input_transform_4x4_5x5_stepz1_nhwc", "winograd_input_transform.cl" },
444     { "winograd_input_transform_4x1_5x1_stepz1_nhwc", "winograd_input_transform.cl" },
445     { "winograd_input_transform_1x4_1x5_stepz1_nhwc", "winograd_input_transform.cl" },
446     { "winograd_input_transform_2x2_7x7_stepz1_nhwc", "winograd_input_transform.cl" },
447     { "winograd_input_transform_2x1_7x1_stepz1_nhwc", "winograd_input_transform.cl" },
448     { "winograd_input_transform_1x2_1x7_stepz1_nhwc", "winograd_input_transform.cl" },
449     { "winograd_output_transform_2x2_3x3_nchw", "winograd_output_transform.cl" },
450     { "winograd_output_transform_2x1_3x1_nchw", "winograd_output_transform.cl" },
451     { "winograd_output_transform_1x2_1x3_nchw", "winograd_output_transform.cl" },
452     { "winograd_output_transform_4x4_3x3_nchw", "winograd_output_transform.cl" },
453     { "winograd_output_transform_4x1_3x1_nchw", "winograd_output_transform.cl" },
454     { "winograd_output_transform_1x4_1x3_nchw", "winograd_output_transform.cl" },
455     { "winograd_output_transform_4x4_5x5_nchw", "winograd_output_transform.cl" },
456     { "winograd_output_transform_4x1_5x1_nchw", "winograd_output_transform.cl" },
457     { "winograd_output_transform_1x4_1x5_nchw", "winograd_output_transform.cl" },
458     { "winograd_output_transform_4x1_3x1_nhwc", "winograd_output_transform.cl" },
459     { "winograd_output_transform_1x4_1x3_nhwc", "winograd_output_transform.cl" },
460     { "winograd_output_transform_4x4_3x3_nhwc", "winograd_output_transform.cl" },
461     { "winograd_output_transform_4x4_5x5_nhwc", "winograd_output_transform.cl" },
462     { "winograd_output_transform_4x1_5x1_nhwc", "winograd_output_transform.cl" },
463     { "winograd_output_transform_1x4_1x5_nhwc", "winograd_output_transform.cl" },
464     { "winograd_output_transform_2x2_7x7_nhwc", "winograd_output_transform.cl" },
465     { "winograd_output_transform_2x1_7x1_nhwc", "winograd_output_transform.cl" },
466     { "winograd_output_transform_1x2_1x7_nhwc", "winograd_output_transform.cl" },
467     { "yolo_layer_nchw", "yolo_layer.cl" },
468     { "yolo_layer_nhwc", "yolo_layer.cl" },
469     { "YUYV422_to_IYUV_bt709", "color_convert.cl" },
470     { "YUYV422_to_NV12_bt709", "color_convert.cl" },
471     { "YUYV422_to_RGB888_bt709", "color_convert.cl" },
472     { "YUYV422_to_RGBA8888_bt709", "color_convert.cl" },
473 };
474 
475 const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
476 {
477 #ifdef EMBEDDED_KERNELS
478     {
479         "absdiff.cl",
480 #include "./cl_kernels/absdiff.clembed"
481     },
482     {
483         "accumulate.cl",
484 #include "./cl_kernels/accumulate.clembed"
485     },
486     {
487         "activation_layer.cl",
488 #include "./cl_kernels/activation_layer.clembed"
489     },
490     {
491         "activation_layer_quant.cl",
492 #include "./cl_kernels/activation_layer_quant.clembed"
493     },
494     {
495         "arg_min_max.cl",
496 #include "./cl_kernels/arg_min_max.clembed"
497     },
498     {
499         "batch_to_space.cl",
500 #include "./cl_kernels/batch_to_space.clembed"
501     },
502     {
503         "bitwise_op.cl",
504 #include "./cl_kernels/bitwise_op.clembed"
505     },
506     {
507         "bounding_box_transform.cl",
508 #include "./cl_kernels/bounding_box_transform.clembed"
509     },
510     {
511         "bounding_box_transform_quantized.cl",
512 #include "./cl_kernels/bounding_box_transform_quantized.clembed"
513     },
514     {
515         "canny.cl",
516 #include "./cl_kernels/canny.clembed"
517     },
518     {
519         "channel_combine.cl",
520 #include "./cl_kernels/channel_combine.clembed"
521     },
522     {
523         "channel_extract.cl",
524 #include "./cl_kernels/channel_extract.clembed"
525     },
526     {
527         "channel_shuffle.cl",
528 #include "./cl_kernels/channel_shuffle.clembed"
529     },
530     {
531         "col2im.cl",
532 #include "./cl_kernels/col2im.clembed"
533     },
534     {
535         "comparisons.cl",
536 #include "./cl_kernels/comparisons.clembed"
537     },
538     {
539         "concatenate.cl",
540 #include "./cl_kernels/concatenate.clembed"
541     },
542     {
543         "color_convert.cl",
544 #include "./cl_kernels/color_convert.clembed"
545     },
546     {
547         "convert_fc_weights.cl",
548 #include "./cl_kernels/convert_fc_weights.clembed"
549     },
550     {
551         "convolution3x3.cl",
552 #include "./cl_kernels/convolution3x3.clembed"
553     },
554     {
555         "convolution5x5.cl",
556 #include "./cl_kernels/convolution5x5.clembed"
557     },
558     {
559         "convolution7x7.cl",
560 #include "./cl_kernels/convolution7x7.clembed"
561     },
562     {
563         "convolution9x9.cl",
564 #include "./cl_kernels/convolution9x9.clembed"
565     },
566     {
567         "convolution_layer.cl",
568 #include "./cl_kernels/convolution_layer.clembed"
569     },
570     {
571         "convolution_rectangle.cl",
572 #include "./cl_kernels/convolution_rectangle.clembed"
573     },
574     {
575         "copy_tensor.cl",
576 #include "./cl_kernels/copy_tensor.clembed"
577     },
578     {
579         "crop_tensor.cl",
580 #include "./cl_kernels/crop_tensor.clembed"
581     },
582     {
583         "upsample_layer.cl",
584 #include "./cl_kernels/upsample_layer.clembed"
585     },
586     {
587         "deconvolution_layer.cl",
588 #include "./cl_kernels/deconvolution_layer.clembed"
589     },
590     {
591         "depth_convert.cl",
592 #include "./cl_kernels/depth_convert.clembed"
593     },
594     {
595         "depth_to_space.cl",
596 #include "./cl_kernels/depth_to_space.clembed"
597     },
598     {
599         "depthwise_convolution.cl",
600 #include "./cl_kernels/depthwise_convolution.clembed"
601     },
602     {
603         "depthwise_convolution_quantized.cl",
604 #include "./cl_kernels/depthwise_convolution_quantized.clembed"
605     },
606     {
607         "dequantization_layer.cl",
608 #include "./cl_kernels/dequantization_layer.clembed"
609     },
610     {
611         "derivative.cl",
612 #include "./cl_kernels/derivative.clembed"
613     },
614     {
615         "dilate.cl",
616 #include "./cl_kernels/dilate.clembed"
617     },
618     {
619         "direct_convolution1x1.cl",
620 #include "./cl_kernels/direct_convolution1x1.clembed"
621     },
622     {
623         "direct_convolution3x3.cl",
624 #include "./cl_kernels/direct_convolution3x3.clembed"
625     },
626     {
627         "direct_convolution5x5.cl",
628 #include "./cl_kernels/direct_convolution5x5.clembed"
629     },
630     {
631         "direct_convolution_quantized.cl",
632 #include "./cl_kernels/direct_convolution_quantized.clembed"
633     },
634     {
635         "direct_convolution9x9.cl",
636 #include "./cl_kernels/direct_convolution9x9.clembed"
637     },
638     {
639         "elementwise_operation.cl",
640 #include "./cl_kernels/elementwise_operation.clembed"
641     },
642     {
643         "elementwise_operation_quantized.cl",
644 #include "./cl_kernels/elementwise_operation_quantized.clembed"
645     },
646     {
647         "elementwise_unary.cl",
648 #include "./cl_kernels/elementwise_unary.clembed"
649     },
650     {
651         "erode.cl",
652 #include "./cl_kernels/erode.clembed"
653     },
654     {
655         "fast_corners.cl",
656 #include "./cl_kernels/fast_corners.clembed"
657     },
658     {
659         "fft.cl",
660 #include "./cl_kernels/fft.clembed"
661     },
662     {
663         "fft_digit_reverse.cl",
664 #include "./cl_kernels/fft_digit_reverse.clembed"
665     },
666     {
667         "fft_scale.cl",
668 #include "./cl_kernels/fft_scale.clembed"
669     },
670     {
671         "fill_border.cl",
672 #include "./cl_kernels/fill_border.clembed"
673     },
674     {
675         "flatten.cl",
676 #include "./cl_kernels/flatten.clembed"
677     },
678     {
679         "floor.cl",
680 #include "./cl_kernels/floor.clembed"
681     },
682     {
683         "gather.cl",
684 #include "./cl_kernels/gather.clembed"
685     },
686     {
687         "gaussian_pyramid.cl",
688 #include "./cl_kernels/gaussian_pyramid.clembed"
689     },
690     {
691         "gemm.cl",
692 #include "./cl_kernels/gemm.clembed"
693     },
694     {
695         "gemm_v1.cl",
696 #include "./cl_kernels/gemm_v1.clembed"
697     },
698     {
699         "gemmlowp.cl",
700 #include "./cl_kernels/gemmlowp.clembed"
701     },
702     {
703         "gemv.cl",
704 #include "./cl_kernels/gemv.clembed"
705     },
706     {
707         "generate_proposals.cl",
708 #include "./cl_kernels/generate_proposals.clembed"
709     },
710     {
711         "generate_proposals_quantized.cl",
712 #include "./cl_kernels/generate_proposals_quantized.clembed"
713     },
714     {
715         "harris_corners.cl",
716 #include "./cl_kernels/harris_corners.clembed"
717     },
718     {
719         "helpers.h",
720 #include "./cl_kernels/helpers.hembed"
721     },
722     {
723         "helpers_asymm.h",
724 #include "./cl_kernels/helpers_asymm.hembed"
725     },
726     {
727         "histogram.cl",
728 #include "./cl_kernels/histogram.clembed"
729     },
730     {
731         "hog.cl",
732 #include "./cl_kernels/hog.clembed"
733     },
734     {
735         "im2col.cl",
736 #include "./cl_kernels/im2col.clembed"
737     },
738     {
739         "instance_normalization.cl",
740 #include "./cl_kernels/instance_normalization.clembed"
741     },
742     {
743         "integral_image.cl",
744 #include "./cl_kernels/integral_image.clembed"
745     },
746     {
747         "l2_normalize.cl",
748 #include "./cl_kernels/l2_normalize.clembed"
749     },
750     {
751         "magnitude_phase.cl",
752 #include "./cl_kernels/magnitude_phase.clembed"
753     },
754     {
755         "mean_stddev.cl",
756 #include "./cl_kernels/mean_stddev.clembed"
757     },
758     {
759         "mean_stddev_normalization.cl",
760 #include "./cl_kernels/mean_stddev_normalization.clembed"
761     },
762     {
763         "memset.cl",
764 #include "./cl_kernels/memset.clembed"
765     },
766     {
767         "minmaxloc.cl",
768 #include "./cl_kernels/minmaxloc.clembed"
769     },
770     {
771         "minmax_layer.cl",
772 #include "./cl_kernels/minmax_layer.clembed"
773     },
774     {
775         "non_linear_filter3x3.cl",
776 #include "./cl_kernels/non_linear_filter3x3.clembed"
777     },
778     {
779         "non_linear_filter5x5.cl",
780 #include "./cl_kernels/non_linear_filter5x5.clembed"
781     },
782     {
783         "non_linear_filter_helpers.h",
784 #include "./cl_kernels/non_linear_filter_helpers.hembed"
785     },
786     {
787         "nonmax.cl",
788 #include "./cl_kernels/nonmax.clembed"
789     },
790     {
791         "normalization_layer.cl",
792 #include "./cl_kernels/normalization_layer.clembed"
793     },
794     {
795         "normalize_planar_yuv_layer.cl",
796 #include "./cl_kernels/normalize_planar_yuv_layer.clembed"
797     },
798     {
799         "normalize_planar_yuv_layer_quantized.cl",
800 #include "./cl_kernels/normalize_planar_yuv_layer_quantized.clembed"
801     },
802     {
803         "batchnormalization_layer.cl",
804 #include "./cl_kernels/batchnormalization_layer.clembed"
805     },
806     {
807         "optical_flow_pyramid_lk.cl",
808 #include "./cl_kernels/optical_flow_pyramid_lk.clembed"
809     },
810     {
811         "pad_layer.cl",
812 #include "./cl_kernels/pad_layer.clembed"
813     },
814     {
815         "permute.cl",
816 #include "./cl_kernels/permute.clembed"
817     },
818     {
819         "pixelwise_mul_float.cl",
820 #include "./cl_kernels/pixelwise_mul_float.clembed"
821     },
822     {
823         "pixelwise_mul_int.cl",
824 #include "./cl_kernels/pixelwise_mul_int.clembed"
825     },
826     {
827         "pooling_layer.cl",
828 #include "./cl_kernels/pooling_layer.clembed"
829     },
830     {
831         "pooling_layer_quantized.cl",
832 #include "./cl_kernels/pooling_layer_quantized.clembed"
833     },
834     {
835         "prior_box_layer.cl",
836 #include "./cl_kernels/prior_box_layer.clembed"
837     },
838     {
839         "qlstm_layer_normalization.cl",
840 #include "./cl_kernels/qlstm_layer_normalization.clembed"
841     },
842     {
843         "quantization_layer.cl",
844 #include "./cl_kernels/quantization_layer.clembed"
845     },
846     {
847         "range.cl",
848 #include "./cl_kernels/range.clembed"
849     },
850     {
851         "reduction_operation.cl",
852 #include "./cl_kernels/reduction_operation.clembed"
853     },
854     {
855         "remap.cl",
856 #include "./cl_kernels/remap.clembed"
857     },
858     {
859         "reorg_layer.cl",
860 #include "./cl_kernels/reorg_layer.clembed"
861     },
862     {
863         "reshape_layer.cl",
864 #include "./cl_kernels/reshape_layer.clembed"
865     },
866     {
867         "reverse.cl",
868 #include "./cl_kernels/reverse.clembed"
869     },
870     {
871         "roi_align_layer.cl",
872 #include "./cl_kernels/roi_align_layer.clembed"
873     },
874     {
875         "roi_align_layer_quantized.cl",
876 #include "./cl_kernels/roi_align_layer_quantized.clembed"
877     },
878     {
879         "roi_pooling_layer.cl",
880 #include "./cl_kernels/roi_pooling_layer.clembed"
881     },
882     {
883         "scale.cl",
884 #include "./cl_kernels/scale.clembed"
885     },
886     {
887         "scale_quantized.cl",
888 #include "./cl_kernels/scale_quantized.clembed"
889     },
890     {
891         "scharr_filter.cl",
892 #include "./cl_kernels/scharr_filter.clembed"
893     },
894     {
895         "select.cl",
896 #include "./cl_kernels/select.clembed"
897     },
898     {
899         "sobel_filter.cl",
900 #include "./cl_kernels/sobel_filter.clembed"
901     },
902     {
903         "softmax_layer.cl",
904 #include "./cl_kernels/softmax_layer.clembed"
905     },
906     {
907         "softmax_layer_quantized.cl",
908 #include "./cl_kernels/softmax_layer_quantized.clembed"
909     },
910     {
911         "slice_ops.cl",
912 #include "./cl_kernels/slice_ops.clembed"
913     },
914     {
915         "space_to_batch.cl",
916 #include "./cl_kernels/space_to_batch.clembed"
917     },
918     {
919         "space_to_depth.cl",
920 #include "./cl_kernels/space_to_depth.clembed"
921     },
922     {
923         "stack_layer.cl",
924 #include "./cl_kernels/stack_layer.clembed"
925     },
926     {
927         "tablelookup.cl",
928 #include "./cl_kernels/tablelookup.clembed"
929     },
930     {
931         "threshold.cl",
932 #include "./cl_kernels/threshold.clembed"
933     },
934     {
935         "tile.cl",
936 #include "./cl_kernels/tile.clembed"
937     },
938     {
939         "transpose.cl",
940 #include "./cl_kernels/transpose.clembed"
941     },
942     {
943         "types.h",
944 #include "./cl_kernels/types.hembed"
945     },
946     {
947         "unpooling_layer.cl",
948 #include "./cl_kernels/unpooling_layer.clembed"
949     },
950     {
951         "warp_affine.cl",
952 #include "./cl_kernels/warp_affine.clembed"
953     },
954     {
955         "warp_helpers.h",
956 #include "./cl_kernels/warp_helpers.hembed"
957     },
958     {
959         "warp_perspective.cl",
960 #include "./cl_kernels/warp_perspective.clembed"
961     },
962     {
963         "winograd_filter_transform.cl",
964 #include "./cl_kernels/winograd_filter_transform.clembed"
965     },
966     {
967         "winograd_input_transform.cl",
968 #include "./cl_kernels/winograd_input_transform.clembed"
969     },
970     {
971         "winograd_output_transform.cl",
972 #include "./cl_kernels/winograd_output_transform.clembed"
973     },
974     {
975         "yolo_layer.cl",
976 #include "./cl_kernels/yolo_layer.clembed"
977     },
978 #endif /* EMBEDDED_KERNELS */
979 };
980 
CLKernelLibrary()981 CLKernelLibrary::CLKernelLibrary()
982     : _compile_context(), _kernel_path()
983 {
984     opencl_is_available(); // Make sure the OpenCL symbols are initialised *before* the CLKernelLibrary is built
985 }
986 
get()987 CLKernelLibrary &CLKernelLibrary::get()
988 {
989     static CLKernelLibrary _kernel_library;
990     return _kernel_library;
991 }
992 
create_kernel(const std::string & kernel_name,const std::set<std::string> & build_options_set) const993 Kernel CLKernelLibrary::create_kernel(const std::string &kernel_name, const std::set<std::string> &build_options_set) const
994 {
995     const std::string program_name = get_program_name(kernel_name);
996     auto              program      = get_program(program_name);
997 
998     return _compile_context.create_kernel(kernel_name, program_name, program.first, _kernel_path, build_options_set, program.second);
999 }
1000 
get_program_name(const std::string & kernel_name) const1001 std::string CLKernelLibrary::get_program_name(const std::string &kernel_name) const
1002 {
1003     // Find which program contains the kernel
1004     auto kernel_program_it = _kernel_program_map.find(kernel_name);
1005 
1006     if(_kernel_program_map.end() == kernel_program_it)
1007     {
1008         ARM_COMPUTE_ERROR_VAR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str());
1009     }
1010 
1011     const std::string program_name = kernel_program_it->second;
1012 
1013     return program_name;
1014 }
1015 
init(std::string kernel_path,cl::Context context,cl::Device device)1016 void CLKernelLibrary::init(std::string kernel_path, cl::Context context, cl::Device device)
1017 {
1018     _compile_context = CLCompileContext(context, device);
1019     _kernel_path     = kernel_path;
1020 }
1021 
set_kernel_path(const std::string & kernel_path)1022 void CLKernelLibrary::set_kernel_path(const std::string &kernel_path)
1023 {
1024     _kernel_path = std::move(kernel_path);
1025 }
1026 
context()1027 cl::Context &CLKernelLibrary::context()
1028 {
1029     return _compile_context.context();
1030 }
1031 
get_device()1032 const cl::Device &CLKernelLibrary::get_device()
1033 {
1034     return _compile_context.get_device();
1035 }
1036 
set_device(cl::Device device)1037 void CLKernelLibrary::set_device(cl::Device device)
1038 {
1039     _compile_context.set_device(device);
1040 }
1041 
set_context(cl::Context context)1042 void CLKernelLibrary::set_context(cl::Context context)
1043 {
1044     _compile_context.set_context(context);
1045 }
1046 
get_kernel_path()1047 std::string CLKernelLibrary::get_kernel_path()
1048 {
1049     return _kernel_path;
1050 }
1051 
clear_programs_cache()1052 void CLKernelLibrary::clear_programs_cache()
1053 {
1054     _compile_context.clear_programs_cache();
1055 }
1056 
get_built_programs() const1057 const std::map<std::string, cl::Program> &CLKernelLibrary::get_built_programs() const
1058 {
1059     return _compile_context.get_built_programs();
1060 }
1061 
add_built_program(const std::string & built_program_name,const cl::Program & program)1062 void CLKernelLibrary::add_built_program(const std::string &built_program_name, const cl::Program &program)
1063 {
1064     _compile_context.add_built_program(built_program_name, program);
1065 }
1066 
fp16_supported() const1067 bool CLKernelLibrary::fp16_supported() const
1068 {
1069     return _compile_context.fp16_supported();
1070 }
1071 
int64_base_atomics_supported() const1072 bool CLKernelLibrary::int64_base_atomics_supported() const
1073 {
1074     return _compile_context.int64_base_atomics_supported();
1075 }
1076 
get_program(const std::string & program_name) const1077 std::pair<std::string, bool> CLKernelLibrary::get_program(const std::string &program_name) const
1078 {
1079 #ifdef EMBEDDED_KERNELS
1080     const auto program_source_it = _program_source_map.find(program_name);
1081 
1082     if(program_source_it == _program_source_map.end())
1083     {
1084         ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str());
1085     }
1086 
1087     return std::make_pair(program_source_it->second, false);
1088 #else  /* EMBEDDED_KERNELS */
1089     // Check for binary
1090     std::string source_name = _kernel_path + program_name;
1091     std::string binary_name = source_name + "bin";
1092     std::string program_source{};
1093     bool        is_binary = false;
1094 
1095     if(std::ifstream(binary_name).is_open())
1096     {
1097         program_source = read_file(binary_name, true);
1098         is_binary      = true;
1099     }
1100     else if(std::ifstream(source_name).is_open())
1101     {
1102         program_source = read_file(source_name, false);
1103     }
1104     else
1105     {
1106         ARM_COMPUTE_ERROR_VAR("Kernel file %s does not exist.", source_name.c_str());
1107     }
1108 
1109     return std::make_pair(program_source, is_binary);
1110 #endif /* EMBEDDED_KERNELS */
1111 }
1112 
max_local_workgroup_size(const cl::Kernel & kernel) const1113 size_t CLKernelLibrary::max_local_workgroup_size(const cl::Kernel &kernel) const
1114 {
1115     return _compile_context.max_local_workgroup_size(kernel);
1116 }
1117 
default_ndrange() const1118 cl::NDRange CLKernelLibrary::default_ndrange() const
1119 {
1120     return _compile_context.default_ndrange();
1121 }
1122 
get_device_version()1123 std::string CLKernelLibrary::get_device_version()
1124 {
1125     return _compile_context.get_device_version();
1126 }
1127 
get_num_compute_units()1128 cl_uint CLKernelLibrary::get_num_compute_units()
1129 {
1130     return _compile_context.get_num_compute_units();
1131 }
1132 
get_compile_context()1133 CLCompileContext &CLKernelLibrary::get_compile_context()
1134 {
1135     return _compile_context;
1136 }
1137