• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/runtime/NEON/functions/NEConvolution.h"
25 
26 #include "arm_compute/core/Error.h"
27 #include "arm_compute/core/ITensor.h"
28 #include "arm_compute/core/PixelValue.h"
29 #include "arm_compute/core/TensorInfo.h"
30 #include "arm_compute/core/Utils.h"
31 #include "arm_compute/core/Validate.h"
32 #include "arm_compute/runtime/NEON/NEScheduler.h"
33 #include "arm_compute/runtime/TensorAllocator.h"
34 #include "src/core/NEON/kernels/NEConvolutionKernel.h"
35 #include "src/core/NEON/kernels/NEConvolutionKernel.h"
36 #include "src/core/NEON/kernels/NEFillBorderKernel.h"
37 #include "support/MemorySupport.h"
38 
39 #include <array>
40 #include <utility>
41 
42 namespace arm_compute
43 {
44 NEConvolution3x3::~NEConvolution3x3() = default;
45 
configure(ITensor * input,ITensor * output,const int16_t * conv,uint32_t scale,BorderMode border_mode,uint8_t constant_border_value)46 void NEConvolution3x3::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
47 {
48     auto k = arm_compute::support::cpp14::make_unique<NEConvolution3x3Kernel>();
49     k->configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
50     _kernel = std::move(k);
51 
52     auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
53     b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
54     _border_handler = std::move(b);
55 }
56 
57 template <unsigned int matrix_size>
58 NEConvolutionSquare<matrix_size>::~NEConvolutionSquare() = default;
59 
60 template <unsigned int matrix_size>
NEConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager)61 NEConvolutionSquare<matrix_size>::NEConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager)
62     : _memory_group(std::move(memory_manager)), _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler()
63 {
64 }
65 
66 template <unsigned int matrix_size>
configure(ITensor * input,ITensor * output,const int16_t * conv,uint32_t scale,BorderMode border_mode,uint8_t constant_border_value)67 void NEConvolutionSquare<matrix_size>::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode,
68                                                  uint8_t constant_border_value)
69 {
70     ARM_COMPUTE_ERROR_ON(conv == nullptr);
71     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
72     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
73 
74     std::array<int16_t, matrix_size> conv_col{ { 0 } };
75     std::array<int16_t, matrix_size> conv_row{ { 0 } };
76 
77     _is_separable = separate_matrix(conv, conv_col.data(), conv_row.data(), matrix_size);
78 
79     auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
80     if(_is_separable)
81     {
82         DataType intermediate_type = DataType::UNKNOWN;
83         std::tie(std::ignore, intermediate_type) = data_type_for_convolution(conv_col.data(), conv_row.data(), matrix_size);
84 
85         _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, intermediate_type));
86 
87         // Manage intermediate buffers
88         _memory_group.manage(&_tmp);
89 
90         // Calculate scale
91         if(scale == 0)
92         {
93             scale = calculate_matrix_scale(conv, matrix_size);
94         }
95 
96         _kernel_hor  = arm_compute::support::cpp14::make_unique<NESeparableConvolutionHorKernel<matrix_size>>();
97         _kernel_vert = arm_compute::support::cpp14::make_unique<NESeparableConvolutionVertKernel<matrix_size>>();
98 
99         _kernel_hor->configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED);
100         _kernel_vert->configure(&_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED);
101 
102         _tmp.allocator()->allocate();
103 
104         b->configure(input, _kernel_hor->border_size(), border_mode, PixelValue(constant_border_value));
105     }
106     else
107     {
108         _kernel = arm_compute::support::cpp14::make_unique<NEConvolutionKernel<matrix_size>>();
109         _kernel->configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
110         b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
111     }
112     _border_handler = std::move(b);
113 }
114 
115 template <unsigned int matrix_size>
run()116 void                   NEConvolutionSquare<matrix_size>::run()
117 {
118     NEScheduler::get().schedule(_border_handler.get(), Window::DimZ);
119 
120     if(_is_separable)
121     {
122         MemoryGroupResourceScope scope_mg(_memory_group);
123 
124         NEScheduler::get().schedule(_kernel_hor.get(), Window::DimY);
125         NEScheduler::get().schedule(_kernel_vert.get(), Window::DimY);
126     }
127     else
128     {
129         NEScheduler::get().schedule(_kernel.get(), Window::DimY);
130     }
131 }
132 
133 template class arm_compute::NEConvolutionSquare<5>;
134 template class arm_compute::NEConvolutionSquare<7>;
135 template class arm_compute::NEConvolutionSquare<9>;
136 
137 NEConvolutionRectangle::~NEConvolutionRectangle() = default;
138 
configure(ITensor * input,ITensor * output,const int16_t * conv,uint32_t rows,uint32_t cols,uint32_t scale,BorderMode border_mode,uint8_t constant_border_value)139 void NEConvolutionRectangle::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value)
140 {
141     auto k = arm_compute::support::cpp14::make_unique<NEConvolutionRectangleKernel>();
142     k->configure(input, output, conv, rows, cols, scale, border_mode == BorderMode::UNDEFINED);
143     _kernel = std::move(k);
144 
145     auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>();
146     b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
147     _border_handler = std::move(b);
148 }
149 } // namespace arm_compute
150