1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "NetworkUtils.hpp"
7
8 #include "SubgraphViewSelector.hpp"
9
10 #include <armnn/Exceptions.hpp>
11 #include <armnn/BackendRegistry.hpp>
12
13 namespace armnn
14 {
15
16 namespace
17 {
18
UpdateOutputSlotToFp32(OutputSlot & outputSlot)19 void UpdateOutputSlotToFp32(OutputSlot& outputSlot)
20 {
21 const TensorInfo& origTensorInfo = outputSlot.GetTensorInfo();
22 TensorInfo newTensorInfo(origTensorInfo);
23 newTensorInfo.SetDataType(DataType::Float32);
24 outputSlot.SetTensorInfo(newTensorInfo);
25 }
26
ChangeOutputBf16ToFp32(Layer & layer)27 void ChangeOutputBf16ToFp32(Layer& layer)
28 {
29 for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
30 {
31 if (outputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16)
32 {
33 UpdateOutputSlotToFp32(*outputSlot);
34 }
35 }
36 }
37
ChangeOutputFp16ToFp32(Layer & layer)38 void ChangeOutputFp16ToFp32(Layer& layer)
39 {
40 for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
41 {
42 if (outputSlot->GetTensorInfo().GetDataType() == DataType::Float16)
43 {
44 UpdateOutputSlotToFp32(*outputSlot);
45 }
46 }
47 }
48
49 } // anonymous namespace
50
InsertConvertBf16ToFp32LayersBefore(Graph & graph,Layer & layer,bool expectCorrectInputType)51 std::vector<ConvertBf16ToFp32Layer*> InsertConvertBf16ToFp32LayersBefore(Graph& graph,
52 Layer& layer,
53 bool expectCorrectInputType)
54 {
55 std::vector<ConvertBf16ToFp32Layer*> convertLayers;
56 convertLayers.reserve(layer.GetNumInputSlots());
57
58 // Insert a ConvertBf16ToFp32Layer before each input slot
59 for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
60 {
61 bool allowInsert = true;
62 if (expectCorrectInputType)
63 {
64 // Only insert ConvertBf16ToFp32Layer before BF16 input slots
65 OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
66 allowInsert =
67 connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16;
68 }
69
70 if (allowInsert)
71 {
72 const std::string name =
73 std::string("convert_bf16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
74 layer.GetName();
75 ConvertBf16ToFp32Layer* convertLayer =
76 graph.InsertNewLayer<ConvertBf16ToFp32Layer>(*inputSlot, name.c_str());
77
78 TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
79 convertInfo.SetDataType(DataType::Float32);
80
81 convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
82
83 convertLayers.emplace_back(convertLayer);
84 }
85 }
86
87 return convertLayers;
88 }
89
InsertConvertFp32ToBf16LayersBefore(Graph & graph,Layer & layer,bool expectCorrectInputType)90 std::vector<ConvertFp32ToBf16Layer*> InsertConvertFp32ToBf16LayersBefore(Graph& graph,
91 Layer& layer,
92 bool expectCorrectInputType)
93 {
94 std::vector<ConvertFp32ToBf16Layer*> convertLayers;
95 convertLayers.reserve(layer.GetNumInputSlots());
96
97 // Insert a ConvertFp32ToBf16Layer before each input slot
98 for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
99 {
100 bool allowInsert = true;
101 if (expectCorrectInputType)
102 {
103 // Only insert ConvertFp32ToBf16Layer before FP32 input slots
104 OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
105 allowInsert =
106 connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float32;
107 }
108
109 if (allowInsert)
110 {
111 const std::string name =
112 std::string("convert_fp32_to_bf16-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
113 layer.GetName();
114 ConvertFp32ToBf16Layer* convertLayer =
115 graph.InsertNewLayer<ConvertFp32ToBf16Layer>(*inputSlot, name.c_str());
116
117 TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
118 convertInfo.SetDataType(DataType::BFloat16);
119
120 convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
121
122 convertLayers.emplace_back(convertLayer);
123 }
124 }
125
126 return convertLayers;
127 }
128
InsertConvertFp16ToFp32LayersBefore(Graph & graph,Layer & layer,bool expectCorrectInputType)129 std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph,
130 Layer& layer,
131 bool expectCorrectInputType)
132 {
133 std::vector<ConvertFp16ToFp32Layer*> convertLayers;
134 convertLayers.reserve(layer.GetNumInputSlots());
135
136 // Insert a ConvertFp16ToFp32Layer before each input slot
137 for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
138 {
139 bool allowInsert = true;
140 if (expectCorrectInputType)
141 {
142 // Only insert ConvertFp16ToFp32Layer before FP16 input slots
143 OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
144 allowInsert =
145 connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float16;
146 }
147
148 if (allowInsert)
149 {
150 const std::string name =
151 std::string("convert_fp16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
152 layer.GetName();
153 ConvertFp16ToFp32Layer* convertLayer =
154 graph.InsertNewLayer<ConvertFp16ToFp32Layer>(*inputSlot, name.c_str());
155
156 TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
157 convertInfo.SetDataType(DataType::Float32);
158
159 convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
160
161 convertLayers.emplace_back(convertLayer);
162 }
163 }
164
165 return convertLayers;
166 }
167
InsertConvertFp32ToBf16LayersAfter(Graph & graph,Layer & layer)168 std::vector<ConvertFp32ToBf16Layer*> InsertConvertFp32ToBf16LayersAfter(Graph& graph, Layer& layer)
169 {
170 const unsigned int numOutputSlots = layer.GetNumOutputSlots();
171
172 std::vector<ConvertFp32ToBf16Layer*> convertLayers;
173 convertLayers.reserve(numOutputSlots);
174
175 // Update Bf16 output slots to FP32 on current layer
176 ChangeOutputBf16ToFp32(layer);
177
178 // Insert a ConvertFp32ToBf16Layer after each FP32 output slot
179 for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
180 {
181 OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
182 if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
183 {
184 const std::string name =
185 std::string("convert_fp32_to_bf16-" + std::to_string(slotIndex) + "-") + layer.GetName();
186 ConvertFp32ToBf16Layer* convertLayer =
187 graph.InsertNewLayer<ConvertFp32ToBf16Layer>(outputSlot, name.c_str());
188
189 TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
190 convertInfo.SetDataType(DataType::BFloat16);
191
192 convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
193
194 convertLayers.emplace_back(convertLayer);
195 }
196 }
197
198 return convertLayers;
199 }
200
InsertConvertFp32ToFp16LayersAfter(Graph & graph,Layer & layer)201 std::vector<ConvertFp32ToFp16Layer*> InsertConvertFp32ToFp16LayersAfter(Graph& graph, Layer& layer)
202 {
203 const unsigned int numOutputSlots = layer.GetNumOutputSlots();
204
205 std::vector<ConvertFp32ToFp16Layer*> convertLayers;
206 convertLayers.reserve(numOutputSlots);
207
208 // Update FP16 output slots to FP32 on current layer
209 ChangeOutputFp16ToFp32(layer);
210
211 // Insert a ConvertFp32ToFp16Layer after each FP32 output slot
212 for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
213 {
214 OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
215 if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
216 {
217 const std::string name =
218 std::string("convert_fp32_to_fp16-" + std::to_string(slotIndex) + "-") + layer.GetName();
219 ConvertFp32ToFp16Layer* convertLayer =
220 graph.InsertNewLayer<ConvertFp32ToFp16Layer>(outputSlot, name.c_str());
221
222 TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
223 convertInfo.SetDataType(DataType::Float16);
224
225 convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
226
227 convertLayers.emplace_back(convertLayer);
228 }
229 }
230
231 return convertLayers;
232 }
233
InsertDebugLayerAfter(Graph & graph,Layer & layer)234 std::vector<DebugLayer*> InsertDebugLayerAfter(Graph& graph, Layer& layer)
235 {
236 std::vector<DebugLayer*> debugLayers;
237 debugLayers.reserve(layer.GetNumOutputSlots());
238
239 // Connect a DebugLayer to each output slot of the layer
240 for (auto outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
241 {
242 const std::string debugName = std::string("DebugLayerAfter") + layer.GetNameStr();
243
244 DebugLayer* debugLayer =
245 graph.InsertNewLayer<DebugLayer>(*outputSlot, debugName.c_str());
246
247 // Sets output tensor info for the debug layer.
248 ARMNN_ASSERT(debugLayer->GetInputSlot(0).GetConnectedOutputSlot() == &(*outputSlot));
249 TensorInfo debugInfo = debugLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
250
251 debugLayer->GetOutputSlot().SetTensorInfo(debugInfo);
252
253 // NOTE: It is OK to do this because DebugLayer is only supported on CpuRef
254 debugLayer->SetBackendId(Compute::CpuRef);
255
256 debugLayers.emplace_back(debugLayer);
257 }
258
259 return debugLayers;
260 }
261
262 } // namespace armnn
263