1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "LayersFwd.hpp"
7
8 #include <Network.hpp>
9 #include <ResolveType.hpp>
10 #include <armnn/INetwork.hpp>
11 #include <TestUtils.hpp>
12
13 #include <doctest/doctest.h>
14
15 using namespace armnn;
16
17 TEST_SUITE("Optimizer")
18 {
19 namespace
20 {
21
22 class Conv2dTest
23 {
24 public:
25 using ConvDescriptorType = armnn::Convolution2dDescriptor;
26 using ConvLayerType = armnn::Convolution2dLayer;
27
AddConvolution(INetwork * network,const Convolution2dDescriptor & descriptor,const char * name)28 static IConnectableLayer *AddConvolution(INetwork *network,
29 const Convolution2dDescriptor &descriptor,
30 const char *name)
31 {
32 return network->AddConvolution2dLayer(descriptor, name);
33 }
34
AddConstantLayers(INetwork * network,const Convolution2dDescriptor & descriptor,const ConstTensor & weights,const Optional<ConstTensor> & biases)35 static std::vector<IConnectableLayer*> AddConstantLayers(INetwork *network,
36 const Convolution2dDescriptor &descriptor,
37 const ConstTensor &weights,
38 const Optional<ConstTensor> &biases)
39 {
40 auto weightsLayer = network->AddConstantLayer(weights, "Weights");
41 weightsLayer->GetOutputSlot(0).SetTensorInfo(weights.GetInfo());
42 std::vector<IConnectableLayer*> layers = {weightsLayer};
43
44 if (descriptor.m_BiasEnabled)
45 {
46 auto biasLayer = network->AddConstantLayer(biases.value(), "Bias");
47 biasLayer->GetOutputSlot(0).SetTensorInfo(biases.value().GetInfo());
48 layers.emplace_back(biasLayer);
49 }
50
51 return layers;
52 }
53 };
54
55 class DepthwiseConv2dTest
56 {
57 public:
58 using ConvDescriptorType = armnn::DepthwiseConvolution2dDescriptor;
59 using ConvLayerType = armnn::DepthwiseConvolution2dLayer;
60
AddConvolution(INetwork * network,const DepthwiseConvolution2dDescriptor & descriptor,const char * name)61 static IConnectableLayer* AddConvolution(INetwork* network,
62 const DepthwiseConvolution2dDescriptor& descriptor,
63 const char* name)
64 {
65
66 return network->AddDepthwiseConvolution2dLayer(descriptor, name);
67 }
68
AddConstantLayers(INetwork * network,const DepthwiseConvolution2dDescriptor & descriptor,const ConstTensor & weights,const Optional<ConstTensor> & biases)69 static std::vector<IConnectableLayer*> AddConstantLayers(INetwork *network,
70 const DepthwiseConvolution2dDescriptor &descriptor,
71 const ConstTensor &weights,
72 const Optional<ConstTensor> &biases)
73 {
74 auto weightsLayer = network->AddConstantLayer(weights, "Weights");
75 weightsLayer->GetOutputSlot(0).SetTensorInfo(weights.GetInfo());
76 std::vector<IConnectableLayer*> layers = {weightsLayer};
77
78 if (descriptor.m_BiasEnabled)
79 {
80 auto biasLayer = network->AddConstantLayer(biases.value(), "Bias");
81 biasLayer->GetOutputSlot(0).SetTensorInfo(biases.value().GetInfo());
82 layers.emplace_back(biasLayer);
83 }
84
85 return layers;
86 }
87 };
88
89 template<typename T>
GetVector(unsigned int size,float initial,float increment)90 std::vector<T> GetVector(unsigned int size, float initial, float increment)
91 {
92 std::vector<float> typeVector(size, initial);
93 std::vector<T> vector(size);
94
95 if (size > 1)
96 {
97 for (unsigned int i = 0; i < size; ++i)
98 {
99 vector[i] = T(initial + (increment * static_cast<float>(i)));
100 }
101 }
102 return vector;
103 }
104
105 } // namespace
106
107 template <typename Conv2dTest,
108 armnn::DataType ArmnnType,
109 typename ConvDescriptorType = typename Conv2dTest::ConvDescriptorType,
110 typename T = armnn::ResolveType<ArmnnType>>
CreateNetwork(bool depthwise,bool preventFusing)111 INetworkPtr CreateNetwork(bool depthwise, bool preventFusing)
112 {
113 // Define layers information
114 ConvDescriptorType convolution2dDescriptor;
115 convolution2dDescriptor.m_BiasEnabled = false;
116 convolution2dDescriptor.m_DataLayout = DataLayout::NHWC;
117 convolution2dDescriptor.m_StrideX = 1;
118 convolution2dDescriptor.m_StrideY = 1;
119 BatchNormalizationDescriptor batchNormDescriptor;
120 batchNormDescriptor.m_DataLayout = DataLayout::NHWC;
121
122 const unsigned int inputDimensionSizes[] = {1, 4, 4, 3}; // NHWCin
123 unsigned int weightsDimensionSizes[] = {4, 2, 2, 3}; // CoutHWCin
124 unsigned int outputDimensionSizes[] = {1, 3, 3, 4}; // NHWCout
125
126 if (depthwise)
127 {
128 // [1, H, W, Cout]
129 weightsDimensionSizes[0] = 1;
130 weightsDimensionSizes[1] = 2;
131 weightsDimensionSizes[2] = 2;
132 weightsDimensionSizes[3] = 12;
133 outputDimensionSizes[3] = weightsDimensionSizes[3];
134 }
135 const unsigned int outputChannelSize[] = {outputDimensionSizes[3]}; // Cout
136
137 TensorInfo inputInfo(4, inputDimensionSizes, ArmnnType);
138 TensorInfo outputInfo(4, outputDimensionSizes, ArmnnType);
139
140 std::vector<int> weightsIntVector = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
141 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
142 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
143 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
144 std::vector<T> weightsVector(begin(weightsIntVector), end(weightsIntVector));
145 TensorInfo weightsInfo(4, weightsDimensionSizes, ArmnnType, 0.0f, 0, true);
146 ConstTensor weights(weightsInfo, weightsVector);
147
148 std::vector<T> betaVector = GetVector<T>(outputDimensionSizes[3], 0.0f, 0.2f);
149 std::vector<T> gammaVector = GetVector<T>(outputDimensionSizes[3], 0.5f, 0.1f);
150 std::vector<T> meanVector = GetVector<T>(outputDimensionSizes[3], 0.1f, 0.1f);
151 std::vector<T> varianceVector = GetVector<T>(outputDimensionSizes[3], 1.0f, 0.1f);
152
153 ConstTensor beta (TensorInfo(1, outputChannelSize, ArmnnType, 0.0f, 0, true), betaVector);
154 ConstTensor gamma (TensorInfo(1, outputChannelSize, ArmnnType, 0.0f, 0, true), gammaVector);
155 ConstTensor mean (TensorInfo(1, outputChannelSize, ArmnnType, 0.0f, 0, true), meanVector);
156 ConstTensor variance(TensorInfo(1, outputChannelSize, ArmnnType, 0.0f, 0, true), varianceVector);
157
158 // Create a network
159 INetworkPtr network = INetwork::Create();
160
161 IConnectableLayer* inputLayer = network->AddInputLayer(0);
162
163 IConnectableLayer* convLayer = Conv2dTest::AddConvolution(network.get(),
164 convolution2dDescriptor,
165 "convolution");
166
167 IConnectableLayer* batchNormLayer = network->AddBatchNormalizationLayer(batchNormDescriptor,
168 mean,
169 variance,
170 beta,
171 gamma,
172 "batchNorm");
173
174 IConnectableLayer* outputLayer = network->AddOutputLayer(0);
175 IConnectableLayer* output2Layer = nullptr;
176
177 if (preventFusing)
178 {
179 output2Layer = network->AddOutputLayer(1);
180 }
181
182 std::vector<IConnectableLayer*> constantLayers = Conv2dTest::AddConstantLayers(network.get(),
183 convolution2dDescriptor,
184 weights,
185 Optional<ConstTensor>());
186
187 // Connect constant layers to receiverLayer.
188 for (unsigned int i = 0; i < constantLayers.size(); ++i)
189 {
190 constantLayers[i]->GetOutputSlot(0).Connect(convLayer->GetInputSlot(i + 1));
191 }
192
193 // Set layer information
194 inputLayer ->GetOutputSlot(0).SetTensorInfo(inputInfo);
195 convLayer ->GetOutputSlot(0).SetTensorInfo(outputInfo);
196 batchNormLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
197
198 // Connect layers
199 inputLayer ->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
200 convLayer ->GetOutputSlot(0).Connect(batchNormLayer->GetInputSlot(0));
201 batchNormLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
202
203 if (preventFusing)
204 {
205 convLayer ->GetOutputSlot(0).Connect(output2Layer->GetInputSlot(0));
206 }
207
208 return network;
209 }
210
211 template <typename Conv2dTest,
212 armnn::DataType ArmnnType,
213 typename ConvDescriptorType = typename Conv2dTest::ConvDescriptorType,
214 typename ConvLayerType = typename Conv2dTest::ConvLayerType,
215 typename T = armnn::ResolveType<ArmnnType>>
FuseBatchNormIntoConvTest(bool depthwise,float tolerance,armnn::Compute backendId)216 void FuseBatchNormIntoConvTest(bool depthwise, float tolerance, armnn::Compute backendId)
217 {
218 // FIRST NETWORK: Fused
219 // Construct ArmNN network
220 INetworkPtr networkFused = CreateNetwork<Conv2dTest, ArmnnType>(depthwise, false);
221
222 // Create ArmNN runtime
223 IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options
224
225 // Optimise ArmNN network
226 IOptimizedNetworkPtr optNetFused = Optimize(*networkFused, {backendId}, run->GetDeviceSpec());
227
228 Graph& graphFused = GetGraphForTesting(optNetFused.get());
229
230 auto checkFusedConv2d = [ ](const armnn::Layer* const layer) -> bool
231 {
232 return IsLayerOfType<ConvLayerType>(layer) &&
233 (layer->GetNameStr() == "fused-batchNorm-into-convolution");
234 };
235 auto checkConstant = [ ](const armnn::Layer* const layer) -> bool
236 {
237 const ConstantLayer* constLayer = PolymorphicDowncast<const ConstantLayer*>(layer);
238 auto tensor = ConstTensor(constLayer->m_LayerOutput->GetTensorInfo(),
239 constLayer->m_LayerOutput->Map(true));
240 const auto* buffer = static_cast<const T*>(tensor.GetMemoryArea());
241 std::vector<T> vector(buffer, buffer + tensor.GetNumElements());
242 return IsLayerOfType<ConstantLayer>(layer);
243 };
244 CHECK(5 == graphFused.GetNumLayers());
245 CHECK(CheckSequence(graphFused.cbegin(),
246 graphFused.cend(),
247 &IsLayerOfType<InputLayer>,
248 checkConstant,
249 checkConstant,
250 checkFusedConv2d,
251 &IsLayerOfType<OutputLayer>));
252
253 // Load network into runtime
254 NetworkId networkIdentifier;
255 CHECK(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) == Status::Success);
256
257 //Creates structures for inputs and outputs.
258 std::vector<T> inputDataFused = GetVector<T>(48, 1.0f, 0.1f);
259
260 std::vector<T> outputDataFused(36);
261
262 if (depthwise)
263 {
264 outputDataFused.resize(108);
265 }
266
267 TensorInfo inputTensorInfo = run->GetInputTensorInfo(networkIdentifier, 0);
268 inputTensorInfo.SetConstant(true);
269 InputTensors inputTensorsFused {
270 {0, ConstTensor(inputTensorInfo, inputDataFused.data())}};
271 OutputTensors outputTensorsFused{
272 {0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
273
274 // Execute network
275 run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused);
276
277 // SECOND NETWORK: NotFused
278 // Construct ArmNN network
279 INetworkPtr networkNotFused = CreateNetwork<Conv2dTest, ArmnnType>(depthwise, true);
280
281 // Create ArmNN runtime
282 IRuntimePtr runNotFused = IRuntime::Create(IRuntime::CreationOptions()); // default options
283
284 // Optimise ArmNN network
285 IOptimizedNetworkPtr optNetNotFused = Optimize(*networkNotFused, { backendId }, runNotFused->GetDeviceSpec());
286
287 Graph& graphNotFused = GetGraphForTesting(optNetNotFused.get());
288
289 CHECK(6 == graphNotFused.GetNumLayers());
290 CHECK(CheckSequence(graphNotFused.cbegin(),
291 graphNotFused.cend(),
292 &IsLayerOfType<armnn::InputLayer>,
293 &IsLayerOfType<armnn::ConstantLayer>,
294 &IsLayerOfType<ConvLayerType>,
295 &IsLayerOfType<armnn::BatchNormalizationLayer>,
296 &IsLayerOfType<armnn::OutputLayer>,
297 &IsLayerOfType<armnn::OutputLayer>));
298
299 // Load network into runtime
300 NetworkId networkIdentifierNotFused;
301 CHECK(runNotFused->LoadNetwork(networkIdentifierNotFused, std::move(optNetNotFused)) == Status::Success);
302
303 //Creates structures for inputs and outputs.
304 std::vector<T> inputDataNotFused = GetVector<T>(48, 1.0f, 0.1f);
305
306 std::vector<T> outputDataNotFused(36);
307 std::vector<T> outputData2NotFused(36);
308
309 if (depthwise)
310 {
311 outputDataNotFused.resize(108);
312 outputData2NotFused.resize(108);
313 }
314
315 TensorInfo inputTensorInfo2 = runNotFused->GetInputTensorInfo(networkIdentifierNotFused, 0);
316 inputTensorInfo2.SetConstant(true);
317 InputTensors inputTensorsNotFused{
318 { 0, ConstTensor(inputTensorInfo2, inputDataNotFused.data()) } };
319 OutputTensors outputTensorsNotFused{
320 { 0, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 0), outputDataNotFused.data()) },
321 { 1, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 1), outputData2NotFused.data()) } };
322
323 // Execute network
324 runNotFused->EnqueueWorkload(networkIdentifierNotFused, inputTensorsNotFused, outputTensorsNotFused);
325
326 // Check the output of the fused-convolution matches with the output of the batchNormm in the "NotFused" network
327 auto epsilon = T(tolerance);
328 for (unsigned int n = 0; n < outputDataFused.size(); ++n)
329 {
330 CHECK_EQ(outputDataFused[n], doctest::Approx(outputDataNotFused[n]).epsilon(epsilon));
331 }
332 }
333
334 // This unit test needs the reference backend, it's not available if the reference backend is not built
335 #if defined(ARMNNREF_ENABLED)
336 TEST_CASE("FuseBatchNormIntoConv2DFloat32Test")
337 {
338 FuseBatchNormIntoConvTest<Conv2dTest, DataType::Float32>(false, 0.0001f, armnn::Compute::CpuRef);
339 }
340
341 TEST_CASE("FuseBatchNormIntoConv2DFloat16Test")
342 {
343 FuseBatchNormIntoConvTest<Conv2dTest, DataType::Float16>(false, 0.1f, armnn::Compute::CpuRef);
344 }
345
346 TEST_CASE("FuseBatchNormIntoDepthwiseConv2DFloat32Test")
347 {
348 FuseBatchNormIntoConvTest<DepthwiseConv2dTest, DataType::Float32>(true, 0.0001f,armnn::Compute::CpuRef);
349 }
350
351 TEST_CASE("FuseBatchNormIntoDepthwiseConv2DFloat16Test")
352 {
353 FuseBatchNormIntoConvTest<DepthwiseConv2dTest, DataType::Float16>(true, 0.2f,armnn::Compute::CpuRef);
354 }
355 #endif
356
357 }
358