1 /*
2 * Copyright (c) 2017-2020 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #include "arm_compute/core/Helpers.h"
25 #include "arm_compute/core/Types.h"
26 #include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
27 #include "arm_compute/runtime/Tensor.h"
28 #include "arm_compute/runtime/TensorAllocator.h"
29 #include "tests/NEON/Accessor.h"
30 #include "tests/PaddingCalculator.h"
31 #include "tests/datasets/ShapeDatasets.h"
32 #include "tests/framework/Asserts.h"
33 #include "tests/framework/Macros.h"
34 #include "tests/framework/datasets/Datasets.h"
35 #include "tests/validation/Validation.h"
36 #include "tests/validation/fixtures/DirectConvolutionLayerFixture.h"
37
38 namespace arm_compute
39 {
40 namespace test
41 {
42 namespace validation
43 {
44 namespace
45 {
46 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
47 const RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.2f)); /**< Relative tolerance value for FP16 types */
48 const AbsoluteTolerance<float> abs_tolerance_f16(0.2f); /**< Absolute tolerance for FP16 types */
49 constexpr float tolerance_num = 0.07f; /**< Tolerance number for the FP16 implementation */
50 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
51 constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
52
53 /** Direct convolution data set.for FP32 */
54 const auto data_pad_f32 = concat(concat(combine(framework::dataset::make("PadX", { 0, 1 }),
55 combine(framework::dataset::make("PadY", { 0, 1 }),
56 framework::dataset::make("KernelSize", 3))),
57 combine(framework::dataset::make("PadX", { 0, 2 }),
58 combine(framework::dataset::make("PadY", { 0, 2 }),
59 framework::dataset::make("KernelSize", 3)))),
60 combine(framework::dataset::make("PadX", { 0, 3 }),
61 combine(framework::dataset::make("PadY", { 0, 3 }),
62 framework::dataset::make("KernelSize", 5))));
63
64 /** Direct convolution data set.for FP16 */
65 const auto data_pad_f16 = concat(combine(framework::dataset::make("PadX", { 0, 1 }),
66 combine(framework::dataset::make("PadY", { 0, 1 }),
67 framework::dataset::make("KernelSize", 3))),
68 combine(framework::dataset::make("PadX", { 0 }),
69 combine(framework::dataset::make("PadY", { 0 }),
70 framework::dataset::make("KernelSize", 1))));
71
72 const auto data_f32 = combine(datasets::SmallDirectConvolutionShapes(),
73 combine(framework::dataset::make("StrideX", { 1, 2, 3 }),
74 combine(framework::dataset::make("StrideY", { 1, 2, 3 }),
75 data_pad_f32)));
76
77 const auto data_f16 = combine(datasets::SmallDirectConvolutionShapes(),
78 combine(framework::dataset::make("StrideX", { 1, 2, 3 }),
79 combine(framework::dataset::make("StrideY", { 1, 2, 3 }),
80 data_pad_f16)));
81
82 const auto data_prec = combine(datasets::SmallDirectConvolutionShapes(),
83 combine(framework::dataset::make("StrideX", { 1 }),
84 combine(framework::dataset::make("StrideY", { 1 }),
85 combine(framework::dataset::make("PadX", { 1 }),
86 combine(framework::dataset::make("PadY", { 1 }),
87 framework::dataset::make("KernelSize", 3))))));
88
89 const auto data9x9 = combine(datasets::SmallDirectConvolutionShapes(),
90 combine(framework::dataset::make("StrideX", { 1 }),
91 combine(framework::dataset::make("StrideY", { 1 }),
92 combine(framework::dataset::make("PadX", { 0, 2 }),
93 combine(framework::dataset::make("PadY", { 0, 3 }),
94 framework::dataset::make("KernelSize", 9))))));
95
96 const auto data_f32_nightly = combine(data_f32, framework::dataset::make("NumKernels", { 1, 4 }));
97 const auto data_f16_nightly = combine(data_f16, framework::dataset::make("NumKernels", { 1, 4 }));
98
99 const auto data_precommit = combine(data_prec, framework::dataset::make("NumKernels", { 1 }));
100 const auto data_precommit9x9 = combine(data9x9, framework::dataset::make("NumKernels", { 4 }));
101
102 /* The following tests is from real use-case that made DirectConvolution
103 * overflows in terms of its tensor indexing. This test case is using
104 * a separate tolerance due to the following reason.
105 * - It has shown that it requires generally larger absolute tolerance
106 * for large numbers or larger relative tolerance for small numbers.
107 * - With the first reason, since it is mainly testing index overflow,
108 * a value with a margin is used to avoid uninteded test failures
109 * during nightly.
110 */
111 constexpr AbsoluteTolerance<float> usecase_tolerance_fp32(0.05f);
112
113 const auto data_nightly_usecase = combine(framework::dataset::make("InputShape", { TensorShape{ 3U, 800U, 800U } }),
114 combine(framework::dataset::make("StrideX", { 1 }),
115 combine(framework::dataset::make("StrideY", { 1 }),
116 combine(framework::dataset::make("PadX", { 4 }),
117 combine(framework::dataset::make("PadY", { 4 }),
118 combine(framework::dataset::make("KernelSize", 9),
119 framework::dataset::make("NumKernels", { 16 })))))));
120
121 /** Activation function Dataset*/
122 const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
123 {
124 ActivationLayerInfo(),
125 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f)
126 });
127 } // namespace
128
129 TEST_SUITE(NEON)
TEST_SUITE(DirectConvolutionLayer)130 TEST_SUITE(DirectConvolutionLayer)
131
132 // *INDENT-OFF*
133 // clang-format off
134 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
135 framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/weights
136 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching input feature maps
137 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported kernel width
138 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Non-rectangular weights dimensions
139 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid weights dimensions
140 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid stride
141 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases size
142 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases dimensions
143 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid output size
144 }),
145 framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F16),
146 TensorInfo(TensorShape(3U, 3U, 3U, 4U), 1, DataType::F32),
147 TensorInfo(TensorShape(9U, 9U, 2U, 4U), 1, DataType::F32),
148 TensorInfo(TensorShape(5U, 3U, 2U, 4U), 1, DataType::F32),
149 TensorInfo(TensorShape(3U, 3U, 2U, 4U, 3U), 1, DataType::F32),
150 TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
151 TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
152 TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
153 TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
154 })),
155 framework::dataset::make("BiasesInfo",{ TensorInfo(TensorShape(4U), 1, DataType::F32),
156 TensorInfo(TensorShape(4U), 1, DataType::F32),
157 TensorInfo(TensorShape(4U), 1, DataType::F32),
158 TensorInfo(TensorShape(4U), 1, DataType::F32),
159 TensorInfo(TensorShape(4U), 1, DataType::F32),
160 TensorInfo(TensorShape(4U), 1, DataType::F32),
161 TensorInfo(TensorShape(3U), 1, DataType::F32),
162 TensorInfo(TensorShape(4U, 2U), 1, DataType::F32),
163 TensorInfo(TensorShape(4U), 1, DataType::F32),
164 })),
165 framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
166 TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
167 TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
168 TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
169 TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
170 TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
171 TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
172 TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
173 TensorInfo(TensorShape(26U, 11U, 4U), 1, DataType::F32),
174 })),
175 framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
176 PadStrideInfo(1, 1, 0, 0),
177 PadStrideInfo(1, 1, 0, 0),
178 PadStrideInfo(1, 1, 0, 0),
179 PadStrideInfo(1, 1, 0, 0),
180 PadStrideInfo(3, 3, 0, 0),
181 PadStrideInfo(1, 1, 0, 0),
182 PadStrideInfo(1, 1, 0, 0),
183 PadStrideInfo(1, 1, 0, 0),
184 })),
185 framework::dataset::make("ActivationInfo",
186 {
187 ActivationLayerInfo(),
188 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)
189 })),
190 framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false })),
191 input_info, weights_info, biases_info, output_info, conv_info, act_info, expected)
192 {
193 bool is_valid = bool(NEDirectConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, act_info));
194 ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
195 }
196 // clang-format on
197 // *INDENT-ON*
198
199 DATA_TEST_CASE(NoPaddingNHWCKernel, framework::DatasetMode::ALL, combine(combine(combine(data_precommit,
200 framework::dataset::make("DataType", DataType::F32)),
201 ActivationFunctionsDataset),
202 framework::dataset::make("DataLayout", { DataLayout::NHWC })),
203
204 shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, act_info, data_layout)
205 {
206 TensorShape input_shape = TensorShape(shape);
207 TensorShape weights_shape(kernel_size, kernel_size, input_shape.z(), num_kernels);
208 const PadStrideInfo info(stride_x, stride_y, pad_x, pad_y, DimensionRoundingType::FLOOR);
209
210 TensorInfo input_info = TensorInfo(input_shape, 1, data_type);
211 TensorInfo weights_info = TensorInfo(weights_shape, 1, data_type);
212
213 TensorShape output_shape = compute_deep_convolution_shape(input_info, weights_info, info);
214
215 if(data_layout == DataLayout::NHWC)
216 {
217 permute(input_shape, PermutationVector(2U, 0U, 1U));
218 permute(weights_shape, PermutationVector(2U, 0U, 1U));
219 permute(output_shape, PermutationVector(2U, 0U, 1U));
220 }
221
222 // Create tensors
223 Tensor src = create_tensor<Tensor>(input_shape, data_type, 1, QuantizationInfo(), data_layout);
224 Tensor weights = create_tensor<Tensor>(weights_shape, data_type, 1, QuantizationInfo(), data_layout);
225 Tensor dst = create_tensor<Tensor>(output_shape, data_type, 1, QuantizationInfo(), data_layout);
226
227 // Create and configure function
228 NEDirectConvolutionLayer conv;
229 conv.configure(&src, &weights, nullptr, &dst, info, act_info);
230
231 validate(src.info()->padding(), PaddingSize(0, 0, 0, 0));
232 validate(weights.info()->padding(), PaddingSize(0, 0, 0, 0));
233 validate(dst.info()->padding(), PaddingSize(0, 0, 0, 0));
234 }
235
236 template <typename T>
237 using NEDirectConvolutionLayerFixture = DirectConvolutionValidationFixture<Tensor, Accessor, NEDirectConvolutionLayer, T>;
238
239 TEST_SUITE(Float)
240 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)241 TEST_SUITE(FP16)
242 FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit, framework::dataset::make("DataType",
243 DataType::F16)),
244 ActivationFunctionsDataset),
245 framework::dataset::make("DataLayout", DataLayout::NCHW)))
246 {
247 // Validate output
248 validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
249 }
250 FIXTURE_DATA_TEST_CASE(RunLarge, NEDirectConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data_f16_nightly, framework::dataset::make("DataType", DataType::F16)),
251 ActivationFunctionsDataset),
252 framework::dataset::make("DataLayout", DataLayout::NCHW)))
253 {
254 // Validate output
255 validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
256 }
257 TEST_SUITE_END() // FP16
258 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
259
TEST_SUITE(FP32)260 TEST_SUITE(FP32)
261 FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit, framework::dataset::make("DataType",
262 DataType::F32)),
263 ActivationFunctionsDataset),
264 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
265 {
266 // Validate output
267 validate(Accessor(_target), _reference, tolerance_fp32);
268 }
269 FIXTURE_DATA_TEST_CASE(RunSmall9x9, NEDirectConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit9x9, framework::dataset::make("DataType",
270 DataType::F32)),
271 ActivationFunctionsDataset),
272 framework::dataset::make("DataLayout", { DataLayout::NHWC })))
273 {
274 // Validate output
275 validate(Accessor(_target), _reference, tolerance_fp32);
276 }
277 FIXTURE_DATA_TEST_CASE(RunLarge, NEDirectConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data_f32_nightly, framework::dataset::make("DataType",
278 DataType::F32)),
279 ActivationFunctionsDataset),
280 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
281 {
282 // Validate output
283 validate(Accessor(_target), _reference, tolerance_fp32);
284 }
285 FIXTURE_DATA_TEST_CASE(RunLargeUsecase, NEDirectConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data_nightly_usecase, framework::dataset::make("DataType",
286 DataType::F32)),
287 framework::dataset::make("ActivationInfo", { ActivationLayerInfo() })),
288 framework::dataset::make("DataLayout", { DataLayout::NHWC })))
289 {
290 // Validate output
291 validate(Accessor(_target), _reference, usecase_tolerance_fp32);
292 }
293 TEST_SUITE_END() // FP32
294 TEST_SUITE_END() // Float
295 TEST_SUITE_END() // DirectConvolutionLayer
296 TEST_SUITE_END() // NEON
297 } // namespace validation
298 } // namespace test
299 } // namespace arm_compute
300