1 /*
2 * Copyright (c) 2017-2022 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #include "src/cpu/kernels/CpuWinogradConv2dKernel.h"
26
27 namespace arm_compute
28 {
29 namespace cpu
30 {
CpuWinogradConv2dTransformInputKernel(arm_conv::winograd::WinogradImpl & w_impl,arm_conv::ConvolutionArgs & _c_args,uint32_t nthreads)31 CpuWinogradConv2dTransformInputKernel::CpuWinogradConv2dTransformInputKernel(arm_conv::winograd::WinogradImpl &w_impl, arm_conv::ConvolutionArgs &_c_args, uint32_t nthreads)
32 : _winograd_impl{ w_impl }, _conv_args{ _c_args }, _nthreads{ nthreads }
33 {
34 }
35
run_op(ITensorPack & tensors,const Window & window,const ThreadInfo & info)36 void CpuWinogradConv2dTransformInputKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
37 {
38 ARM_COMPUTE_UNUSED(window);
39 const ITensor *input_nhwc = tensors.get_const_tensor(TensorType::ACL_SRC);
40 const ITensor *winograd_input_transform = tensors.get_const_tensor(TensorType::ACL_DST);
41 const ITensor *workspace = tensors.get_const_tensor(TensorType::ACL_INT);
42
43 const unsigned int width_idx = 1;
44 const unsigned int height_idx = 2;
45 const unsigned int batch_idx = 3;
46 int element_size_in_bytes = input_nhwc->info()->element_size();
47 const auto src_strides = input_nhwc->info()->strides_in_bytes();
48
49 const size_t input_row_stride = src_strides[height_idx] / element_size_in_bytes;
50 const size_t input_col_stride = src_strides[width_idx] / element_size_in_bytes;
51 const size_t input_batch_stride = src_strides[batch_idx] / element_size_in_bytes;
52 const auto input_nhwc_ptr = reinterpret_cast<const void *>(input_nhwc->buffer() + input_nhwc->info()->offset_first_element_in_bytes());
53 auto win_transf_ptr = reinterpret_cast<void *>(winograd_input_transform->buffer() + winograd_input_transform->info()->offset_first_element_in_bytes());
54
55 _winograd_impl.input_transform->execute(
56 _conv_args,
57 input_nhwc_ptr,
58 input_batch_stride,
59 input_row_stride,
60 input_col_stride,
61 win_transf_ptr,
62 _winograd_impl.winograd_spec,
63 workspace->buffer(),
64 info.thread_id,
65 _nthreads);
66 }
67
CpuWinogradConv2dTransformOutputKernel(arm_conv::winograd::WinogradImpl & w_impl,arm_conv::ConvolutionArgs & _c_args,uint32_t nthreads)68 CpuWinogradConv2dTransformOutputKernel::CpuWinogradConv2dTransformOutputKernel(arm_conv::winograd::WinogradImpl &w_impl, arm_conv::ConvolutionArgs &_c_args, uint32_t nthreads)
69 : _winograd_impl{ w_impl }, _conv_args{ _c_args }, _nthreads{ nthreads }
70 {
71 }
72
73 // Inherited methods overridden:
run_op(ITensorPack & tensors,const Window & window,const ThreadInfo & info)74 void CpuWinogradConv2dTransformOutputKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
75 {
76 ARM_COMPUTE_UNUSED(window);
77 const ITensor *dst_nhwc = tensors.get_const_tensor(TensorType::ACL_DST);
78 const ITensor *winograd_output_transform = tensors.get_const_tensor(TensorType::ACL_SRC_0);
79 const ITensor *biases = tensors.get_const_tensor(TensorType::ACL_SRC_1);
80 const ITensor *workspace = tensors.get_tensor(TensorType::ACL_INT);
81
82 const unsigned int width_idx = 1;
83 const unsigned int height_idx = 2;
84 const unsigned int batch_idx = 3;
85 const int element_size_in_bytes = dst_nhwc->info()->element_size();
86 const auto dst_strides = dst_nhwc->info()->strides_in_bytes();
87
88 const size_t out_row_stride = dst_strides[height_idx] / element_size_in_bytes;
89 const size_t out_col_stride = dst_strides[width_idx] / element_size_in_bytes;
90 const size_t out_batch_stride = dst_strides[batch_idx] / element_size_in_bytes;
91 const auto wout_transf_ptr = reinterpret_cast<const void *>(winograd_output_transform->buffer() + winograd_output_transform->info()->offset_first_element_in_bytes());
92 auto dst_nhwc_ptr = reinterpret_cast<void *>(dst_nhwc->buffer() + dst_nhwc->info()->offset_first_element_in_bytes());
93 void *biases_data_ptr = nullptr;
94 if(biases != nullptr)
95 {
96 biases_data_ptr = reinterpret_cast<void *>(biases->buffer() + biases->info()->offset_first_element_in_bytes());
97 }
98
99 // Output transform
100 _winograd_impl.output_transform->execute(
101 _conv_args,
102 wout_transf_ptr,
103 _winograd_impl.winograd_spec,
104 biases_data_ptr,
105 dst_nhwc_ptr,
106 out_batch_stride,
107 out_row_stride,
108 out_col_stride,
109 workspace->buffer(),
110 info.thread_id,
111 _nthreads);
112 }
113
114 } // namespace cpu
115 } // namespace arm_compute