1 //
2 // Copyright © 2020-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "LayersFwd.hpp"
7
8 #include <ResolveType.hpp>
9 #include <armnn/INetwork.hpp>
10 #include <GraphUtils.hpp>
11 #include <TestUtils.hpp>
12
13 #include <doctest/doctest.h>
14
15 #include <armnnUtils/QuantizeHelper.hpp>
16 #include <string>
17
18 using namespace armnn;
19
20 namespace
21 {
22
23 template<typename T>
GetVector(unsigned int size,float initial,float increment)24 std::vector<T> GetVector(unsigned int size, float initial, float increment)
25 {
26 std::vector<float> typeVector(size, initial);
27 std::vector<T> vector(size);
28
29 if (size > 1)
30 {
31 for (unsigned int i = 0; i < size; ++i)
32 {
33 vector[i] = T(initial + (increment * static_cast<float>(i)));
34 }
35 }
36 return vector;
37 }
38
39 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
40 struct Convolution2dTest
41 {
42 using LayerType = Convolution2dLayer;
43 static const bool isElementWise = false;
44 static const bool isConstTensorAsInputSupported = true;
45
GetInputShape__anond1a1e5130111::Convolution2dTest46 static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
GetOutputShape__anond1a1e5130111::Convolution2dTest47 static TensorShape GetOutputShape() { return TensorShape( {1, 3, 3, 4}); } // NHWCout
GetWeightsShape__anond1a1e5130111::Convolution2dTest48 static TensorShape GetWeightsShape() { return TensorShape( {4, 2, 2, 3}); } // CoutHWCin
49
50 constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
51 constexpr static const unsigned int outputSize = 36; // batchOut * heightOut * widthOut * channelOut
52
AddReceiverLayer__anond1a1e5130111::Convolution2dTest53 static IConnectableLayer* AddReceiverLayer(INetwork* network,
54 const char* name,
55 float scale = 1.f,
56 int32_t offset = 0)
57 {
58 IgnoreUnused(scale);
59 IgnoreUnused(offset);
60
61 Convolution2dDescriptor descriptor;
62 descriptor.m_DataLayout = DataLayout::NHWC;
63 descriptor.m_StrideX = 1;
64 descriptor.m_StrideY = 1;
65
66 return network->AddConvolution2dLayer(descriptor, name);
67 }
68
AddConstantLayers__anond1a1e5130111::Convolution2dTest69 static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
70 float scale = 1.f,
71 int32_t offset = 0)
72 {
73
74 std::vector<float> weightsData = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
75 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
76 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
77 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42 };
78 std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
79 TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset, true);
80 ConstTensor weights(weightsInfo, weightsVector);
81
82 IConnectableLayer* weightsLayer = network->AddConstantLayer(weights, "Weights");
83 weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
84
85 std::vector<IConnectableLayer*> layers = { weightsLayer };
86 return layers;
87 }
88 };
89
90 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
91 struct DWConvolution2dTest
92 {
93 public:
94 using LayerType = DepthwiseConvolution2dLayer;
95 static const bool isElementWise = false;
96 static const bool isConstTensorAsInputSupported = true;
97
GetInputShape__anond1a1e5130111::DWConvolution2dTest98 static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // [N,H,W,Cin]
GetOutputShape__anond1a1e5130111::DWConvolution2dTest99 static TensorShape GetOutputShape() { return TensorShape( {1, 3, 3, 12}); } // [N,H,W,Cout]
GetWeightsShape__anond1a1e5130111::DWConvolution2dTest100 static TensorShape GetWeightsShape() { return TensorShape( {1, 2, 2, 12}); } // [1,H,W,Cout]
101
102 constexpr static const unsigned int inputSize = 48; //batchIn * heightIn * widthIn * channelIn;
103 constexpr static const unsigned int outputSize = 108; //batchOut * heightOut * widthOut * channelOut;
104
AddReceiverLayer__anond1a1e5130111::DWConvolution2dTest105 static IConnectableLayer* AddReceiverLayer(INetwork* network,
106 const char* name,
107 float scale = 1.f,
108 int32_t offset = 0)
109 {
110 IgnoreUnused(scale);
111 IgnoreUnused(offset);
112
113 DepthwiseConvolution2dDescriptor descriptor;
114 descriptor.m_BiasEnabled = false;
115 descriptor.m_DataLayout = DataLayout::NHWC;
116 descriptor.m_StrideX = 1;
117 descriptor.m_StrideY = 1;
118
119 return network->AddDepthwiseConvolution2dLayer(descriptor, name);
120 }
121
AddConstantLayers__anond1a1e5130111::DWConvolution2dTest122 static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
123 float scale = 1.f,
124 int32_t offset = 0)
125 {
126 std::vector<float> weightsData = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
127 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
128 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
129 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
130 std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
131 TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset, true);
132 ConstTensor weights(weightsInfo, weightsVector);
133
134 IConnectableLayer* weightsLayer = network->AddConstantLayer(weights, "Weights");
135 weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
136
137 std::vector<IConnectableLayer*> layers = { weightsLayer };
138 return layers;
139 }
140 };
141
142 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
143 struct FullyConnectedTest
144 {
145 public:
146 using LayerType = FullyConnectedLayer;
147 static const bool isElementWise = false;
148 static const bool isConstTensorAsInputSupported = true;
149
GetInputShape__anond1a1e5130111::FullyConnectedTest150 static TensorShape GetInputShape() { return TensorShape( {2, 5, 1, 1}); } // NCinHW
GetOutputShape__anond1a1e5130111::FullyConnectedTest151 static TensorShape GetOutputShape() { return TensorShape( {2, 3}); } // NCout
GetWeightsShape__anond1a1e5130111::FullyConnectedTest152 static TensorShape GetWeightsShape() { return TensorShape( {5, 3}); } // CinCout
153
154 constexpr static const unsigned int inputSize = 10; // batchIn * heightIn * widthIn * channelIn
155 constexpr static const unsigned int outputSize = 6; // batchOut * heightOut * widthOut * channelOut
156
AddReceiverLayer__anond1a1e5130111::FullyConnectedTest157 static IConnectableLayer* AddReceiverLayer(INetwork* network,
158 const char* name,
159 float scale = 1.f,
160 int32_t offset = 0)
161 {
162 IgnoreUnused(scale);
163 IgnoreUnused(offset);
164
165 FullyConnectedDescriptor descriptor;
166 descriptor.m_BiasEnabled = false;
167
168 return network->AddFullyConnectedLayer(descriptor, name);
169 }
170
AddConstantLayers__anond1a1e5130111::FullyConnectedTest171 static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
172 float scale = 1.f,
173 int32_t offset = 0)
174 {
175 std::vector<float> weightsData = { 1, 2, 3, 4, 5,
176 6, 7, 8, 9, 10,
177 11, 12, 13, 14, 15};
178 std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
179 TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset, true);
180 ConstTensor weights(weightsInfo, weightsVector);
181
182 IConnectableLayer* weightsLayer = network->AddConstantLayer(weights, "Weights");
183 weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
184
185 std::vector<IConnectableLayer*> layers = { weightsLayer };
186 return layers;
187 }
188 };
189
190 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
191 struct BatchNormTest
192 {
193 public:
194 using LayerType = BatchNormalizationLayer;
195 static const bool isElementWise = false;
196 static const bool isConstTensorAsInputSupported = false;
197
GetInputShape__anond1a1e5130111::BatchNormTest198 static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
GetOutputShape__anond1a1e5130111::BatchNormTest199 static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
200
201 constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
202 constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
203
AddReceiverLayer__anond1a1e5130111::BatchNormTest204 static IConnectableLayer* AddReceiverLayer(INetwork* network,
205 const char* name,
206 float scale = 1.f,
207 int32_t offset = 0)
208 {
209 IgnoreUnused(scale);
210 IgnoreUnused(offset);
211
212 BatchNormalizationDescriptor descriptor;
213 descriptor.m_DataLayout = DataLayout::NHWC;
214
215 std::vector<T> betaVector = GetVector<T>(GetOutputShape()[3], 0.0f, 0.2f);
216 std::vector<T> gammaVector = GetVector<T>(GetOutputShape()[3], 0.5f, 0.1f);
217 std::vector<T> meanVector = GetVector<T>(GetOutputShape()[3], 0.1f, 0.1f);
218 std::vector<T> varianceVector = GetVector<T>(GetOutputShape()[3], 1.0f, 0.1f);
219
220 const unsigned int outputChannelSize[] = { GetOutputShape()[3] };
221 ConstTensor beta(TensorInfo(1, outputChannelSize, ArmnnType, 0.0f, 0, true), betaVector);
222 ConstTensor gamma(TensorInfo(1, outputChannelSize, ArmnnType, 0.0f, 0, true), gammaVector);
223 ConstTensor mean(TensorInfo(1, outputChannelSize, ArmnnType, 0.0f, 0, true), meanVector);
224 ConstTensor variance(TensorInfo(1, outputChannelSize, ArmnnType, 0.0f, 0, true), varianceVector);
225
226 return network->AddBatchNormalizationLayer(descriptor, mean, variance, beta, gamma, name);
227 }
228
AddConstantLayers__anond1a1e5130111::BatchNormTest229 static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
230 float scale = 1.f,
231 int32_t offset = 0)
232 {
233 IgnoreUnused(network);
234 IgnoreUnused(scale);
235 IgnoreUnused(offset);
236 return {};
237 }
238 };
239
240 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
241 struct MultiplicationTest
242 {
243 using LayerType = MultiplicationLayer;
244 static const bool isElementWise = true;
245 static const bool isConstTensorAsInputSupported = false;
246
GetInputShape__anond1a1e5130111::MultiplicationTest247 static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
GetOutputShape__anond1a1e5130111::MultiplicationTest248 static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
249
250 constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
251 constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
252
AddReceiverLayer__anond1a1e5130111::MultiplicationTest253 static IConnectableLayer* AddReceiverLayer(INetwork* network,
254 const char* name,
255 float scale = 1.f,
256 int32_t offset = 0)
257 {
258 IgnoreUnused(scale);
259 IgnoreUnused(offset);
260
261 ARMNN_NO_DEPRECATE_WARN_BEGIN
262 return network->AddMultiplicationLayer(name);
263 ARMNN_NO_DEPRECATE_WARN_END
264 }
265
AddConstantLayers__anond1a1e5130111::MultiplicationTest266 static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
267 float scale = 1.f,
268 int32_t offset = 0)
269 {
270 IgnoreUnused(network);
271 IgnoreUnused(scale);
272 IgnoreUnused(offset);
273 return {};
274 }
275 };
276
277 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
278 struct AdditionTest
279 {
280 using LayerType = AdditionLayer;
281 static const bool isElementWise = true;
282 static const bool isConstTensorAsInputSupported = false;
283
GetInputShape__anond1a1e5130111::AdditionTest284 static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
GetOutputShape__anond1a1e5130111::AdditionTest285 static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
286
287 constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
288 constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
289
AddReceiverLayer__anond1a1e5130111::AdditionTest290 static IConnectableLayer* AddReceiverLayer(INetwork* network,
291 const char* name,
292 float scale = 1.f,
293 int32_t offset = 0)
294 {
295 IgnoreUnused(scale);
296 IgnoreUnused(offset);
297
298 ARMNN_NO_DEPRECATE_WARN_BEGIN
299 return network->AddAdditionLayer(name);
300 ARMNN_NO_DEPRECATE_WARN_END
301 }
302
AddConstantLayers__anond1a1e5130111::AdditionTest303 static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
304 float scale = 1.f,
305 int32_t offset = 0)
306 {
307 IgnoreUnused(network);
308 IgnoreUnused(scale);
309 IgnoreUnused(offset);
310 return {};
311 }
312 };
313
314 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
315 struct SubtractionTest
316 {
317 using LayerType = SubtractionLayer;
318 static const bool isElementWise = true;
319 static const bool isConstTensorAsInputSupported = false;
320
GetInputShape__anond1a1e5130111::SubtractionTest321 static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
GetOutputShape__anond1a1e5130111::SubtractionTest322 static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
323
324 constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
325 constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
326
AddReceiverLayer__anond1a1e5130111::SubtractionTest327 static IConnectableLayer* AddReceiverLayer(INetwork* network,
328 const char* name,
329 float scale = 1.f,
330 int32_t offset = 0)
331 {
332 IgnoreUnused(scale);
333 IgnoreUnused(offset);
334
335 ARMNN_NO_DEPRECATE_WARN_BEGIN
336 return network->AddSubtractionLayer(name);
337 ARMNN_NO_DEPRECATE_WARN_END
338 }
339
AddConstantLayers__anond1a1e5130111::SubtractionTest340 static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
341 float scale = 1.f,
342 int32_t offset = 0)
343 {
344 IgnoreUnused(network);
345 IgnoreUnused(scale);
346 IgnoreUnused(offset);
347 return {};
348 }
349 };
350
351 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
352 struct DivisionTest
353 {
354 using LayerType = DivisionLayer;
355 static const bool isElementWise = true;
356 static const bool isConstTensorAsInputSupported = false;
357
GetInputShape__anond1a1e5130111::DivisionTest358 static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
GetOutputShape__anond1a1e5130111::DivisionTest359 static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
360
361 constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
362 constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
363
AddReceiverLayer__anond1a1e5130111::DivisionTest364 static IConnectableLayer* AddReceiverLayer(INetwork* network,
365 const char* name,
366 float scale = 1.f,
367 int32_t offset = 0)
368 {
369 IgnoreUnused(scale);
370 IgnoreUnused(offset);
371
372 ARMNN_NO_DEPRECATE_WARN_BEGIN
373 return network->AddDivisionLayer(name);
374 ARMNN_NO_DEPRECATE_WARN_END
375 }
376
AddConstantLayers__anond1a1e5130111::DivisionTest377 static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
378 float scale = 1.f,
379 int32_t offset = 0)
380 {
381 IgnoreUnused(network);
382 IgnoreUnused(scale);
383 IgnoreUnused(offset);
384 return {};
385 }
386 };
387
388 template<typename LayerTest,
389 DataType ArmnnType>
CreateNetwork(ActivationDescriptor activationDescriptor,bool preventFusing,float scale,int32_t offset)390 INetworkPtr CreateNetwork(ActivationDescriptor activationDescriptor, bool preventFusing,
391 float scale, int32_t offset)
392 {
393 // Create a network
394 INetworkPtr network = INetwork::Create();
395
396 IConnectableLayer* inputLayer = network->AddInputLayer(0);
397
398 IConnectableLayer* receiverLayer = LayerTest::AddReceiverLayer(network.get(),
399 "receiverLayer",
400 scale,
401 offset);
402
403 IConnectableLayer* activationLayer = network->AddActivationLayer(activationDescriptor,
404 "activation");
405
406 IConnectableLayer* outputLayer = network->AddOutputLayer(0);
407 IConnectableLayer* output2Layer = preventFusing ? network->AddOutputLayer(1) : nullptr;
408
409 // If ConstTensorAsInputs is supported weights and bias are stored as constant layers.
410 if (LayerTest::isConstTensorAsInputSupported)
411 {
412 std::vector<IConnectableLayer*> constantLayers = LayerTest::AddConstantLayers(network.get(),
413 scale,
414 offset);
415
416 // Connect constant layers to receiverLayer.
417 for (unsigned int i = 0; i < constantLayers.size(); ++i)
418 {
419 constantLayers[i]->GetOutputSlot(0).Connect(receiverLayer->GetInputSlot(i + 1));
420 }
421 }
422
423 // Define layers information
424 TensorInfo inputInfo(LayerTest::GetInputShape(), ArmnnType, scale, offset);
425 TensorInfo outputInfo(LayerTest::GetOutputShape(), ArmnnType, scale, offset);
426
427 // Set layer information
428 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
429 receiverLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
430 activationLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
431
432 // Connect layers
433 inputLayer->GetOutputSlot(0).Connect(receiverLayer->GetInputSlot(0));
434 receiverLayer->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
435 activationLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
436
437 if (LayerTest::isElementWise)
438 {
439 inputLayer->GetOutputSlot(0).Connect(receiverLayer->GetInputSlot(1));
440 }
441 if (preventFusing)
442 {
443 receiverLayer->GetOutputSlot(0).Connect(output2Layer->GetInputSlot(0));
444 }
445
446 return network;
447 }
448
449 template<typename LayerTest,
450 DataType ArmnnType,
451 typename LayerType = typename LayerTest::LayerType,
452 typename T = ResolveType<ArmnnType>>
FuseActivationIntoPreviousLayerTest(ActivationDescriptor activationDescriptor,float tolerance,Compute backendId,float scale=1.f,int32_t offset=0)453 void FuseActivationIntoPreviousLayerTest(ActivationDescriptor activationDescriptor, float tolerance, Compute backendId,
454 float scale = 1.f, int32_t offset=0)
455 {
456 // FIRST NETWORK: Fused
457 // Construct ArmNN network
458 INetworkPtr networkFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor, false, scale, offset);
459
460 // Create ArmNN runtime
461 IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options
462
463 // Optimise ArmNN network
464 IOptimizedNetworkPtr optNetFused = Optimize(*networkFused, {backendId}, run->GetDeviceSpec());
465
466 Graph& graphFused = GetGraphForTesting(optNetFused.get());
467
468 auto checkFusedConv2d = [](const Layer* const layer)->bool {
469 return IsLayerOfType<LayerType>(layer) &&
470 (layer->GetNameStr() == "fused-activation-into-receiverLayer");
471 };
472
473 // If ConstTensorAsInputs is supported, weights and bias are stored as constant layers.
474 if(LayerTest::isConstTensorAsInputSupported)
475 {
476 CHECK(4 == graphFused.GetNumLayers());
477 CHECK(CheckSequence(graphFused.cbegin(),
478 graphFused.cend(),
479 &IsLayerOfType<InputLayer>,
480 &IsLayerOfType<ConstantLayer>,
481 checkFusedConv2d,
482 &IsLayerOfType<OutputLayer>));
483
484 // Check if new constant layer is connected to fused receiver layer.
485 Layer* fusedReceiverLayer = GetFirstLayerWithName(graphFused, "fused-activation-into-receiverLayer");
486 CHECK(fusedReceiverLayer);
487 CHECK(fusedReceiverLayer->GetInputSlot(1).GetConnection() != nullptr);
488 }
489 else
490 {
491 CHECK(3 == graphFused.GetNumLayers());
492 CHECK(CheckSequence(graphFused.cbegin(),
493 graphFused.cend(),
494 &IsLayerOfType<InputLayer>,
495 checkFusedConv2d,
496 &IsLayerOfType<OutputLayer>));
497 }
498
499 // Load network into runtime
500 NetworkId networkIdentifier;
501 CHECK(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) == Status::Success);
502
503 //Creates structures for inputs and outputs.
504 std::vector<float> data = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
505 std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
506 std::vector<T> outputDataFused(LayerTest::outputSize);
507
508 armnn::TensorInfo inputTensorInfo = run->GetInputTensorInfo(networkIdentifier, 0);
509 inputTensorInfo.SetConstant(true);
510
511 InputTensors inputTensorsFused{
512 {0, ConstTensor(inputTensorInfo, inputDataFused.data())}};
513 OutputTensors outputTensorsFused{
514 {0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
515
516 // Execute network
517 CHECK(run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused) == Status::Success);
518
519 // SECOND NETWORK: NotFused
520 // Construct ArmNN network
521 INetworkPtr networkNotFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor, true, scale, offset);
522
523 // Create ArmNN runtime
524 IRuntimePtr runNotFused = IRuntime::Create(IRuntime::CreationOptions()); // default options
525
526 // Optimise ArmNN network
527 IOptimizedNetworkPtr optNetNotFused = Optimize(*networkNotFused, {backendId}, runNotFused->GetDeviceSpec());
528
529 Graph& graphNotFused = GetGraphForTesting(optNetNotFused.get());
530
531 // If ConstTensorAsInputs is supported, weights and bias are stored as constant layers.
532 if(LayerTest::isConstTensorAsInputSupported)
533 {
534 CHECK(6 == graphNotFused.GetNumLayers());
535 CHECK(CheckSequence(graphNotFused.cbegin(),
536 graphNotFused.cend(),
537 &IsLayerOfType<InputLayer>,
538 &IsLayerOfType<ConstantLayer>,
539 &IsLayerOfType<LayerType>,
540 &IsLayerOfType<ActivationLayer>,
541 &IsLayerOfType<OutputLayer>,
542 &IsLayerOfType<OutputLayer>));
543 }
544 else
545 {
546 CHECK(5 == graphNotFused.GetNumLayers());
547 CHECK(CheckSequence(graphNotFused.cbegin(),
548 graphNotFused.cend(),
549 &IsLayerOfType<InputLayer>,
550 &IsLayerOfType<LayerType>,
551 &IsLayerOfType<ActivationLayer>,
552 &IsLayerOfType<OutputLayer>,
553 &IsLayerOfType<OutputLayer>));
554 }
555
556 // Load network into runtime
557 NetworkId networkIdentifierNotFused;
558 CHECK(runNotFused->LoadNetwork(networkIdentifierNotFused, std::move(optNetNotFused)) == Status::Success);
559
560 //Creates structures for inputs and outputs.
561 std::vector<T> inputDataNotFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
562 std::vector<T> outputDataNotFused(LayerTest::outputSize);
563 std::vector<T> outputData2NotFused(LayerTest::outputSize);
564
565 TensorInfo inputTensorInfoNotFused = runNotFused->GetInputTensorInfo(networkIdentifierNotFused, 0);
566 inputTensorInfoNotFused.SetConstant(true);
567
568 InputTensors inputTensorsNotFused{
569 {0, ConstTensor(inputTensorInfoNotFused, inputDataNotFused.data())}};
570 OutputTensors outputTensorsNotFused{
571 {0, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 0), outputDataNotFused.data())},
572 {1, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 1), outputData2NotFused.data())}};
573
574 // Execute network
575 CHECK(runNotFused->EnqueueWorkload(networkIdentifierNotFused, inputTensorsNotFused, outputTensorsNotFused)
576 == Status::Success);
577
578 // Check the output of the fused-activation matches with the output of the activation in the "NotFused" network
579 for (unsigned int n = 0; n < outputDataFused.size(); ++n)
580 {
581 auto outputNotFused = static_cast<float>(outputDataNotFused[n]);
582 CHECK(static_cast<float>(outputDataFused[n]) == doctest::Approx(outputNotFused).epsilon(tolerance));
583 }
584 }
585
586 template<typename LayerTest,
587 DataType ArmnnType,
588 typename LayerType = typename LayerTest::LayerType,
589 typename T = ResolveType<ArmnnType>>
FuseActivationSimpleTest(ActivationDescriptor activationDescriptor,Compute backendId,float scale=1.f,int32_t offset=0)590 bool FuseActivationSimpleTest(ActivationDescriptor activationDescriptor, Compute backendId,
591 float scale = 1.f, int32_t offset = 0)
592 {
593 bool success;
594 try
595 {
596 // Construct ArmNN network
597 INetworkPtr networkFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor, false, scale, offset);
598
599 // Create ArmNN runtime
600 IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options
601
602 // Optimise ArmNN network
603 IOptimizedNetworkPtr optNetFused = Optimize(*networkFused, {backendId}, run->GetDeviceSpec());
604
605 // Load network into runtime
606 NetworkId networkIdentifier;
607 CHECK(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) == Status::Success);
608
609 //Creates structures for inputs and outputs.
610 std::vector<float> data = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
611 std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
612 std::vector<T> outputDataFused(LayerTest::outputSize);
613
614 TensorInfo inputTensorInfo = run->GetInputTensorInfo(networkIdentifier, 0);
615 inputTensorInfo.SetConstant(true);
616
617 InputTensors inputTensorsFused{
618 {0, ConstTensor(inputTensorInfo, inputDataFused.data())}};
619 OutputTensors outputTensorsFused{
620 {0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
621
622 // Execute network
623 run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused);
624
625 success = true;
626 }
627 catch (const std::exception& e)
628 {
629 std::cerr << e.what() << std::endl;
630 success = false;
631 }
632
633 return success;
634 }
635
636 }
637
638 #if defined(ARMCOMPUTENEON_ENABLED)
639 TEST_SUITE("Optimizer")
640 {
641 // ReLu fused into Receiver Layers Float32
642 TEST_CASE("FuseReLUIntoConvFloat32CpuAccTest")
643 {
644 ActivationDescriptor activationDescriptor;
645 activationDescriptor.m_Function = ActivationFunction::ReLu;
646
647 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
648 (activationDescriptor, 0.0001f, Compute::CpuAcc);
649 }
650 TEST_CASE("FuseReLUIntoDWConvFloat32CpuAccTest")
651 {
652 ActivationDescriptor activationDescriptor;
653 activationDescriptor.m_Function = ActivationFunction::ReLu;
654
655 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
656 (activationDescriptor, 0.0001f, Compute::CpuAcc);
657 }
658 TEST_CASE("FuseReLUIntoFullyConnectedFloat32CpuAccTest")
659 {
660 ActivationDescriptor activationDescriptor;
661 activationDescriptor.m_Function = ActivationFunction::ReLu;
662
663 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
664 (activationDescriptor, 0.0001f, Compute::CpuAcc);
665 }
666 TEST_CASE("FuseReLUIntoBatchNormFloat32CpuAccTest")
667 {
668 ActivationDescriptor activationDescriptor;
669 activationDescriptor.m_Function = ActivationFunction::ReLu;
670
671 FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
672 (activationDescriptor, 0.0001f, Compute::CpuAcc);
673 }
674
675 // BoundedReLu fused into Receiver Layers Float32
676 TEST_CASE("FuseBoundedReLUIntoConvFloat32CpuAccTest")
677 {
678 ActivationDescriptor activationDescriptor;
679 activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
680 activationDescriptor.m_A = 1.0f;
681 activationDescriptor.m_B = -1.0f;
682
683 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
684 (activationDescriptor, 0.0001f, Compute::CpuAcc);
685 }
686 TEST_CASE("FuseBoundedReLUIntoDWConvFloat32CpuAccTest")
687 {
688 ActivationDescriptor activationDescriptor;
689 activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
690 activationDescriptor.m_A = 1.0f;
691 activationDescriptor.m_B = -1.0f;
692
693 FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::Float32 > , DataType::Float32 >
694 (activationDescriptor, 0.0001f, Compute::CpuAcc);
695 }
696 TEST_CASE("FuseBoundedReLUIntoFullyConnectedFloat32CpuAccTest")
697 {
698 ActivationDescriptor activationDescriptor;
699 activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
700 activationDescriptor.m_A = 1.0f;
701 activationDescriptor.m_B = -1.0f;
702
703 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
704 (activationDescriptor, 0.0001f, Compute::CpuAcc);
705 }
706 TEST_CASE("FuseBoundedReLUIntoBatchNormFloat32CpuAccTest")
707 {
708 ActivationDescriptor activationDescriptor;
709 activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
710 activationDescriptor.m_A = 1.0f;
711 activationDescriptor.m_B = -1.0f;
712
713 FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
714 (activationDescriptor, 0.0001f, Compute::CpuAcc);
715 }
716
717 // ReLU fused into Receiver Layers QAsymmU8
718 TEST_CASE("FuseReLUIntoConvQAsymmU8CpuAccTest")
719 {
720 ActivationDescriptor activationDescriptor;
721 activationDescriptor.m_Function = ActivationFunction::ReLu;
722
723 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
724 (activationDescriptor, 0.0001f, Compute::CpuAcc);
725 }
726 TEST_CASE("FuseReLUIntoDWConvQAsymmU8CpuAccTest")
727 {
728 ActivationDescriptor activationDescriptor;
729 activationDescriptor.m_Function = ActivationFunction::ReLu;
730
731 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
732 (activationDescriptor, 0.0001f, Compute::CpuAcc);
733 }
734 TEST_CASE("FuseReLUIntoFullyConnectedQAsymmU8CpuAccTest")
735 {
736 ActivationDescriptor activationDescriptor;
737 activationDescriptor.m_Function = ActivationFunction::ReLu;
738
739 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
740 (activationDescriptor, 0.0001f, Compute::CpuAcc);
741 }
742
743 // BoundedReLu fused into Receiver Layers QAsymmS8
744 TEST_CASE("FuseBoundedReLUIntoConvQASymmS8CpuAccTest")
745 {
746 ActivationDescriptor activationDescriptor;
747 activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
748 activationDescriptor.m_A = 6.0f;
749 activationDescriptor.m_B = 0.0f;
750
751 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmS8>, DataType::QAsymmS8>
752 (activationDescriptor, 0.0001f, Compute::CpuAcc);
753 }
754 TEST_CASE("FuseBoundedReLUIntoDWConvQASymmS8CpuAccTest")
755 {
756 ActivationDescriptor activationDescriptor;
757 activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
758 activationDescriptor.m_A = 6.0f;
759 activationDescriptor.m_B = 0.0f;
760
761 FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::QAsymmS8 > , DataType::QAsymmS8 >
762 (activationDescriptor, 0.0001f, Compute::CpuAcc);
763 }
764 TEST_CASE("FuseBoundedReLUIntoFullyConnectedQASymmS8CpuAccTest")
765 {
766 ActivationDescriptor activationDescriptor;
767 activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
768 activationDescriptor.m_A = 6.0f;
769 activationDescriptor.m_B = 0.0f;
770
771 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmS8>, DataType::QAsymmS8>
772 (activationDescriptor, 0.0001f, Compute::CpuAcc);
773 }
774
775 // TanH fused into Receiver Layers Float32
776 TEST_CASE("FuseTanHIntoConvFloat32CpuAccTest")
777 {
778 ActivationDescriptor activationDescriptor;
779 activationDescriptor.m_Function = ActivationFunction::TanH;
780
781 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
782 (activationDescriptor, 0.0001f, Compute::CpuAcc);
783 }
784
785 // HardSwish fused into Receiver Layers Float32
786 TEST_CASE("FuseHardSwishIntoConvFloat32CpuAccTest")
787 {
788 ActivationDescriptor activationDescriptor;
789 activationDescriptor.m_Function = ActivationFunction::HardSwish;
790
791 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
792 (activationDescriptor, 0.0001f, Compute::CpuAcc);
793 }
794
795 // Test that all receiver layers follow by all activation layers work, either fused or not fused
796 TEST_CASE("LayerFollowedByActivationFloat32CpuAccTest")
797 {
798 ActivationDescriptor activationDescriptor;
799 for (int i = 0; i != 12; ++i)
800 {
801 activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
802 activationDescriptor.m_A = 1.0f;
803 activationDescriptor.m_B = -1.0f;
804 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
805 (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " << i);
806 CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
807 (activationDescriptor, Compute::CpuAcc)), "DepthwiseConvolution + Activation function " << i);
808 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
809 (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " << i);
810 CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float32>, DataType::Float32>
811 (activationDescriptor, Compute::CpuAcc)), "BatchNorm + Activation function " << i);
812 }
813 }
814 TEST_CASE("LayerFollowedByActivationFloat16CpuAccTest")
815 {
816 ActivationDescriptor activationDescriptor;
817 for (int i = 0; i != 12; ++i)
818 {
819 activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
820 activationDescriptor.m_A = 1.0f;
821 activationDescriptor.m_B = -1.0f;
822 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float16>, DataType::Float16>
823 (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " << i);
824 CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float16>, DataType::Float16>
825 (activationDescriptor, Compute::CpuAcc)), "DepthwiseConvolution + Activation function " << i);
826 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float16>, DataType::Float16>
827 (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " << i);
828 CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float16>, DataType::Float16>
829 (activationDescriptor, Compute::CpuAcc)), "BatchNorm + Activation function " << i);
830 }
831 }
832 TEST_CASE("LayerFollowedByActivationQAsymmU8CpuAccTest")
833 {
834 ActivationDescriptor activationDescriptor;
835
836 activationDescriptor.m_Function = ActivationFunction::Sigmoid;
837 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
838 (activationDescriptor, Compute::CpuAcc, 1.f / 256.f, 0)), "Convolution + Activation function " <<
839 static_cast<int>(activationDescriptor.m_Function));
840 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
841 (activationDescriptor, Compute::CpuAcc, 1.f / 256.f, 0)), "FullyConnected + Activation function " <<
842 static_cast<int>(activationDescriptor.m_Function));
843
844 activationDescriptor.m_Function = ActivationFunction::TanH;
845 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
846 (activationDescriptor, Compute::CpuAcc, 1.f / 128.f, 128)), "Convolution + Activation function " <<
847 static_cast<int>(activationDescriptor.m_Function));
848 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
849 (activationDescriptor, Compute::CpuAcc, 1.f / 128.f, 128)), "FullyConnected + Activation function " <<
850 static_cast<int>(activationDescriptor.m_Function));
851
852 activationDescriptor.m_Function = ActivationFunction::ReLu;
853 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
854 (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
855 static_cast<int>(activationDescriptor.m_Function));
856 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
857 (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
858 static_cast<int>(activationDescriptor.m_Function));
859
860 activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
861 activationDescriptor.m_A = 1.0f;
862 activationDescriptor.m_B = -1.0f;
863 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
864 (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
865 static_cast<int>(activationDescriptor.m_Function));
866 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
867 (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
868 static_cast<int>(activationDescriptor.m_Function));
869
870 activationDescriptor.m_Function = ActivationFunction::HardSwish;
871 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
872 (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
873 static_cast<int>(activationDescriptor.m_Function));
874 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
875 (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
876 static_cast<int>(activationDescriptor.m_Function));
877 }
878 }
879 #endif
880
881 #if defined(ARMCOMPUTECL_ENABLED)
882 TEST_SUITE("Optimizer")
883 {
884 // ReLu fused into Receiver Layers Float32
885 TEST_CASE("FuseReLUIntoConvFloat32GpuAccTest")
886 {
887 ActivationDescriptor activationDescriptor;
888 activationDescriptor.m_Function = ActivationFunction::ReLu;
889
890 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
891 (activationDescriptor, 0.0001f, Compute::GpuAcc);
892 }
893 TEST_CASE("FuseReLUIntoDWConvFloat32GpuAccTest")
894 {
895 ActivationDescriptor activationDescriptor;
896 activationDescriptor.m_Function = ActivationFunction::ReLu;
897
898 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
899 (activationDescriptor, 0.0001f, Compute::GpuAcc);
900 }
901 TEST_CASE("FuseReLUIntoFullyConnectedFloat32GpuAccTest")
902 {
903 ActivationDescriptor activationDescriptor;
904 activationDescriptor.m_Function = ActivationFunction::ReLu;
905
906 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
907 (activationDescriptor, 0.0001f, Compute::GpuAcc);
908 }
909 TEST_CASE("FuseReLUIntoBatchNormFloat32GpuAccTest")
910 {
911 ActivationDescriptor activationDescriptor;
912 activationDescriptor.m_Function = ActivationFunction::ReLu;
913
914 FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
915 (activationDescriptor, 0.0001f, Compute::GpuAcc);
916 }
917 TEST_CASE("FuseReLUIntoMulFloat32GpuAccTest")
918 {
919 ActivationDescriptor activationDescriptor;
920 activationDescriptor.m_Function = ActivationFunction::ReLu;
921
922 FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
923 (activationDescriptor, 0.0001f, Compute::GpuAcc);
924 }
925 TEST_CASE("FuseReLUIntoAddFloat32GpuAccTest")
926 {
927 ActivationDescriptor activationDescriptor;
928 activationDescriptor.m_Function = ActivationFunction::ReLu;
929
930 FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
931 (activationDescriptor, 0.0001f, Compute::GpuAcc);
932 }
933 TEST_CASE("FuseReLUIntoSubFloat32GpuAccTest")
934 {
935 ActivationDescriptor activationDescriptor;
936 activationDescriptor.m_Function = ActivationFunction::ReLu;
937
938 FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
939 (activationDescriptor, 0.0001f, Compute::GpuAcc);
940 }
941 TEST_CASE("FuseReLUIntoDivFloat32GpuAccTest")
942 {
943 ActivationDescriptor activationDescriptor;
944 activationDescriptor.m_Function = ActivationFunction::ReLu;
945
946 FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
947 (activationDescriptor, 0.0001f, Compute::GpuAcc);
948 }
949
950 // BoundedReLu fused into Receiver Layers Float32
951 TEST_CASE("FuseBoundedReLUIntoConvFloat32GpuAccTest")
952 {
953 ActivationDescriptor activationDescriptor;
954 activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
955 activationDescriptor.m_A = 1.0f;
956 activationDescriptor.m_B = -1.0f;
957
958 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
959 (activationDescriptor, 0.0001f, Compute::GpuAcc);
960 }
961 TEST_CASE("FuseBoundedReLUIntoDWConvFloat32GpuAccTest")
962 {
963 ActivationDescriptor activationDescriptor;
964 activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
965 activationDescriptor.m_A = 1.0f;
966 activationDescriptor.m_B = -1.0f;
967
968 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
969 (activationDescriptor, 0.0001f, Compute::GpuAcc);
970 }
971 TEST_CASE("FuseBoundedReLUIntoFullyConnectedFloat32GpuAccTest")
972 {
973 ActivationDescriptor activationDescriptor;
974 activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
975 activationDescriptor.m_A = 1.0f;
976 activationDescriptor.m_B = -1.0f;
977
978 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
979 (activationDescriptor, 0.0001f, Compute::GpuAcc);
980 }
981 TEST_CASE("FuseBoundedReLUIntoBatchNormFloat32GpuAccTest")
982 {
983 ActivationDescriptor activationDescriptor;
984 activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
985 activationDescriptor.m_A = 1.0f;
986 activationDescriptor.m_B = -1.0f;
987
988 FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
989 (activationDescriptor, 0.0001f, Compute::GpuAcc);
990 }
991 TEST_CASE("FuseBoundedReLUIntoMulFloat32GpuAccTest")
992 {
993 ActivationDescriptor activationDescriptor;
994 activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
995 activationDescriptor.m_A = 1.0f;
996 activationDescriptor.m_B = -1.0f;
997
998 FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
999 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1000 }
1001 TEST_CASE("FuseBoundedReLUIntoAddFloat32GpuAccTest")
1002 {
1003 ActivationDescriptor activationDescriptor;
1004 activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1005 activationDescriptor.m_A = 1.0f;
1006 activationDescriptor.m_B = -1.0f;
1007
1008 FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
1009 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1010 }
1011 TEST_CASE("FuseBoundedReLUIntoSubFloat32GpuAccTest")
1012 {
1013 ActivationDescriptor activationDescriptor;
1014 activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1015 activationDescriptor.m_A = 1.0f;
1016 activationDescriptor.m_B = -1.0f;
1017
1018 FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
1019 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1020 }
1021 TEST_CASE("FuseBoundedReLUIntoDivFloat32GpuAccTest")
1022 {
1023 ActivationDescriptor activationDescriptor;
1024 activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1025 activationDescriptor.m_A = 1.0f;
1026 activationDescriptor.m_B = -1.0f;
1027
1028 FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
1029 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1030 }
1031
1032 // ReLu fused into Receiver Layers Float16
1033 TEST_CASE("FuseReLUIntoConvFloat16GpuAccTest")
1034 {
1035 ActivationDescriptor activationDescriptor;
1036 activationDescriptor.m_Function = ActivationFunction::ReLu;
1037
1038 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float16>, DataType::Float16>
1039 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1040 }
1041 TEST_CASE("FuseReLUIntoDWConvFloat16GpuAccTest")
1042 {
1043 ActivationDescriptor activationDescriptor;
1044 activationDescriptor.m_Function = ActivationFunction::ReLu;
1045
1046 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float16>, DataType::Float16>
1047 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1048 }
1049 TEST_CASE("FuseReLUIntoFullyConnectedFloat16GpuAccTest")
1050 {
1051 ActivationDescriptor activationDescriptor;
1052 activationDescriptor.m_Function = ActivationFunction::ReLu;
1053
1054 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float16>, DataType::Float16>
1055 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1056 }
1057 TEST_CASE("FuseReLUIntoBatchNormFloat16GpuAccTest")
1058 {
1059 ActivationDescriptor activationDescriptor;
1060 activationDescriptor.m_Function = ActivationFunction::ReLu;
1061
1062 FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float16>, DataType::Float16>
1063 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1064 }
1065 TEST_CASE("FuseReLUIntoMulFloat16GpuAccTest")
1066 {
1067 ActivationDescriptor activationDescriptor;
1068 activationDescriptor.m_Function = ActivationFunction::ReLu;
1069
1070 FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float16>, DataType::Float16>
1071 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1072 }
1073 TEST_CASE("FuseReLUIntoAddFloat16GpuAccTest")
1074 {
1075 ActivationDescriptor activationDescriptor;
1076 activationDescriptor.m_Function = ActivationFunction::ReLu;
1077
1078 FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float16>, DataType::Float16>
1079 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1080 }
1081 TEST_CASE("FuseReLUIntoSubFloat16GpuAccTest")
1082 {
1083 ActivationDescriptor activationDescriptor;
1084 activationDescriptor.m_Function = ActivationFunction::ReLu;
1085
1086 FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float16>, DataType::Float16>
1087 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1088 }
1089 TEST_CASE("FuseReLUIntoDivFloat16GpuAccTest")
1090 {
1091 ActivationDescriptor activationDescriptor;
1092 activationDescriptor.m_Function = ActivationFunction::ReLu;
1093
1094 FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float16>, DataType::Float16>
1095 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1096 }
1097
1098 // ReLU fused into Receiver Layers QAsymmU8
1099 TEST_CASE("FuseReLUQIntoConvAsymmU8GpuAccTest")
1100 {
1101 ActivationDescriptor activationDescriptor;
1102 activationDescriptor.m_Function = ActivationFunction::ReLu;
1103
1104 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1105 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1106 }
1107 TEST_CASE("FuseReLUQIntoDWConvAsymmU8GpuAccTest")
1108 {
1109 ActivationDescriptor activationDescriptor;
1110 activationDescriptor.m_Function = ActivationFunction::ReLu;
1111
1112 FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1113 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1114 }
1115 TEST_CASE("FuseReLUQIntoFullyConnectedAsymmU8GpuAccTest")
1116 {
1117 ActivationDescriptor activationDescriptor;
1118 activationDescriptor.m_Function = ActivationFunction::ReLu;
1119
1120 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1121 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1122 }
1123
1124 // BoundedReLu fused into Receiver Layers QAsymmS8
1125 TEST_CASE("FuseBoundedReLUIntoConvQASymmS8GpuAccTest")
1126 {
1127 ActivationDescriptor activationDescriptor;
1128 activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1129 activationDescriptor.m_A = 6.0f;
1130 activationDescriptor.m_B = 0.0f;
1131
1132 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmS8>, DataType::QAsymmS8>
1133 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1134 }
1135 TEST_CASE("FuseBoundedReLUIntoDWConvQASymmS8GpuAccTest")
1136 {
1137 ActivationDescriptor activationDescriptor;
1138 activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1139 activationDescriptor.m_A = 6.0f;
1140 activationDescriptor.m_B = 0.0f;
1141
1142 FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::QAsymmS8 > , DataType::QAsymmS8 >
1143 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1144 }
1145 TEST_CASE("FuseBoundedReLUIntoFullyConnectedQASymmS8GpuAccTest")
1146 {
1147 ActivationDescriptor activationDescriptor;
1148 activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1149 activationDescriptor.m_A = 6.0f;
1150 activationDescriptor.m_B = 0.0f;
1151
1152 FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmS8>, DataType::QAsymmS8>
1153 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1154 }
1155
1156 // TanH fused into Receiver Layers Float32
1157 TEST_CASE("FuseTanHIntoConvFloat32GpuAccTest")
1158 {
1159 ActivationDescriptor activationDescriptor;
1160 activationDescriptor.m_Function = ActivationFunction::TanH;
1161
1162 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
1163 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1164 }
1165 TEST_CASE("FuseTanHIntoMulFloat32GpuAccTest")
1166 {
1167 ActivationDescriptor activationDescriptor;
1168 activationDescriptor.m_Function = ActivationFunction::TanH;
1169
1170 FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
1171 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1172 }
1173 TEST_CASE("FuseTanHIntoAddFloat32GpuAccTest")
1174 {
1175 ActivationDescriptor activationDescriptor;
1176 activationDescriptor.m_Function = ActivationFunction::TanH;
1177
1178 FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
1179 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1180 }
1181 TEST_CASE("FuseTanHIntoSubFloat32GpuAccTest")
1182 {
1183 ActivationDescriptor activationDescriptor;
1184 activationDescriptor.m_Function = ActivationFunction::TanH;
1185
1186 FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
1187 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1188 }
1189 TEST_CASE("FuseTanHIntoDivFloat32GpuAccTest")
1190 {
1191 ActivationDescriptor activationDescriptor;
1192 activationDescriptor.m_Function = ActivationFunction::TanH;
1193
1194 FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
1195 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1196 }
1197
1198 // HardSwish fused into Receiver Layers Float32
1199 TEST_CASE("FuseHardSwishIntoConvFloat32GpuAccTest")
1200 {
1201 ActivationDescriptor activationDescriptor;
1202 activationDescriptor.m_Function = ActivationFunction::HardSwish;
1203
1204 FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
1205 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1206 }
1207 TEST_CASE("FuseHardSwishIntoMulFloat32GpuAccTest")
1208 {
1209 ActivationDescriptor activationDescriptor;
1210 activationDescriptor.m_Function = ActivationFunction::HardSwish;
1211
1212 FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
1213 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1214 }
1215 TEST_CASE("FuseHardSwishIntoAddFloat32GpuAccTest")
1216 {
1217 ActivationDescriptor activationDescriptor;
1218 activationDescriptor.m_Function = ActivationFunction::HardSwish;
1219
1220 FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
1221 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1222 }
1223 TEST_CASE("FuseHardSwishIntoSubFloat32GpuAccTest")
1224 {
1225 ActivationDescriptor activationDescriptor;
1226 activationDescriptor.m_Function = ActivationFunction::HardSwish;
1227
1228 FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
1229 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1230 }
1231 TEST_CASE("FuseHardSwishIntoDivFloat32GpuAccTest")
1232 {
1233 ActivationDescriptor activationDescriptor;
1234 activationDescriptor.m_Function = ActivationFunction::HardSwish;
1235
1236 FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
1237 (activationDescriptor, 0.0001f, Compute::GpuAcc);
1238 }
1239
1240 // Test that all receiver layers follow by all activation layers work, either fused or not fused
1241 TEST_CASE("LayerFollowedByActivationFloat32GpuAccTest")
1242 {
1243 ActivationDescriptor activationDescriptor;
1244 for (int i = 0; i != 12; ++i)
1245 {
1246 activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
1247 activationDescriptor.m_A = 1.0f;
1248 activationDescriptor.m_B = -1.0f;
1249 if (activationDescriptor.m_Function != ActivationFunction::Elu)
1250 {
1251 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
1252 (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " << i);
1253 CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
1254 (activationDescriptor, Compute::GpuAcc)), "DepthwiseConvolution + Activation function " << i);
1255 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
1256 (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " << i);
1257 CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float32>, DataType::Float32>
1258 (activationDescriptor, Compute::GpuAcc)), "BatchNorm + Activation function " << i);
1259 CHECK_MESSAGE((FuseActivationSimpleTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
1260 (activationDescriptor, Compute::GpuAcc)), "Multiplication + Activation function " << i);
1261 CHECK_MESSAGE((FuseActivationSimpleTest<AdditionTest<DataType::Float32>, DataType::Float32>
1262 (activationDescriptor, Compute::GpuAcc)), "Addition + Activation function " << i);
1263 CHECK_MESSAGE((FuseActivationSimpleTest<SubtractionTest<DataType::Float32>, DataType::Float32>
1264 (activationDescriptor, Compute::GpuAcc)), "Subtraction + Activation function " << i);
1265 CHECK_MESSAGE((FuseActivationSimpleTest<DivisionTest<DataType::Float32>, DataType::Float32>
1266 (activationDescriptor, Compute::GpuAcc)), "Division + Activation function " << i);
1267 }
1268 }
1269 }
1270 TEST_CASE("LayerFollowedByActivationFloat16GpuAccTest")
1271 {
1272 ActivationDescriptor activationDescriptor;
1273 for (int i = 0; i != 12; ++i)
1274 {
1275 activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
1276 activationDescriptor.m_A = 1.0f;
1277 activationDescriptor.m_B = -1.0f;
1278 if (activationDescriptor.m_Function != ActivationFunction::Elu)
1279 {
1280 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float16>, DataType::Float16>
1281 (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " << i);
1282 CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float16>, DataType::Float16>
1283 (activationDescriptor, Compute::GpuAcc)), "Depthwise + Activation function " << i);
1284 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float16>, DataType::Float16>
1285 (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " << i);
1286 CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float16>, DataType::Float16>
1287 (activationDescriptor, Compute::GpuAcc)), "BatchNorm + Activation function " << i);
1288 CHECK_MESSAGE((FuseActivationSimpleTest<MultiplicationTest<DataType::Float16>, DataType::Float16>
1289 (activationDescriptor, Compute::GpuAcc)), "Multiplication + Activation function " << i);
1290 CHECK_MESSAGE((FuseActivationSimpleTest<AdditionTest<DataType::Float16>, DataType::Float16>
1291 (activationDescriptor, Compute::GpuAcc)), "Addition + Activation function " << i);
1292 CHECK_MESSAGE((FuseActivationSimpleTest<SubtractionTest<DataType::Float16>, DataType::Float16>
1293 (activationDescriptor, Compute::GpuAcc)), "Subtraction + Activation function " << i);
1294 CHECK_MESSAGE((FuseActivationSimpleTest<DivisionTest<DataType::Float16>, DataType::Float16>
1295 (activationDescriptor, Compute::GpuAcc)), "Division + Activation function " << i);
1296 }
1297 }
1298 }
1299 TEST_CASE("LayerFollowedByActivationQAsymmU8GpuAccTest")
1300 {
1301 ActivationDescriptor activationDescriptor;
1302
1303 activationDescriptor.m_Function = ActivationFunction::Sigmoid;
1304 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1305 (activationDescriptor, Compute::GpuAcc, 1.f / 256.f, 0)), "Convolution + Activation function " <<
1306 static_cast<int>(activationDescriptor.m_Function));
1307 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1308 (activationDescriptor, Compute::GpuAcc, 1.f / 256.f, 0)), "FullyConnected + Activation function " <<
1309 static_cast<int>(activationDescriptor.m_Function));
1310
1311 activationDescriptor.m_Function = ActivationFunction::TanH;
1312 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1313 (activationDescriptor, Compute::GpuAcc, 1.f / 128.f, 128)), "Convolution + Activation function " <<
1314 static_cast<int>(activationDescriptor.m_Function));
1315 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1316 (activationDescriptor, Compute::GpuAcc, 1.f / 128.f, 128)), "FullyConnected + Activation function " <<
1317 static_cast<int>(activationDescriptor.m_Function));
1318
1319 activationDescriptor.m_Function = ActivationFunction::ReLu;
1320 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1321 (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
1322 static_cast<int>(activationDescriptor.m_Function));
1323 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1324 (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
1325 static_cast<int>(activationDescriptor.m_Function));
1326
1327 activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1328 activationDescriptor.m_A = 1.0f;
1329 activationDescriptor.m_B = -1.0f;
1330 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1331 (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
1332 static_cast<int>(activationDescriptor.m_Function));
1333 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1334 (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
1335 static_cast<int>(activationDescriptor.m_Function));
1336
1337 activationDescriptor.m_Function = ActivationFunction::HardSwish;
1338 CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1339 (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
1340 static_cast<int>(activationDescriptor.m_Function));
1341 CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1342 (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
1343 static_cast<int>(activationDescriptor.m_Function));
1344 }
1345 }
1346 #endif
1347