• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NetworkUtils.hpp"
7 
8 #include "SubgraphViewSelector.hpp"
9 
10 #include <armnn/Exceptions.hpp>
11 #include <armnn/BackendRegistry.hpp>
12 
13 namespace armnn
14 {
15 
16 namespace
17 {
18 
UpdateOutputSlotToFp32(OutputSlot & outputSlot)19 void UpdateOutputSlotToFp32(OutputSlot& outputSlot)
20 {
21     const TensorInfo& origTensorInfo = outputSlot.GetTensorInfo();
22     TensorInfo newTensorInfo(origTensorInfo);
23     newTensorInfo.SetDataType(DataType::Float32);
24     outputSlot.SetTensorInfo(newTensorInfo);
25 }
26 
ChangeOutputBf16ToFp32(Layer & layer)27 void ChangeOutputBf16ToFp32(Layer& layer)
28 {
29     for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
30     {
31         if (outputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16)
32         {
33             UpdateOutputSlotToFp32(*outputSlot);
34         }
35     }
36 }
37 
ChangeOutputFp16ToFp32(Layer & layer)38 void ChangeOutputFp16ToFp32(Layer& layer)
39 {
40     for (auto&& outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
41     {
42         if (outputSlot->GetTensorInfo().GetDataType() == DataType::Float16)
43         {
44             UpdateOutputSlotToFp32(*outputSlot);
45         }
46     }
47 }
48 
49 } // anonymous namespace
50 
InsertConvertBf16ToFp32LayersBefore(Graph & graph,Layer & layer,bool expectCorrectInputType)51 std::vector<ConvertBf16ToFp32Layer*> InsertConvertBf16ToFp32LayersBefore(Graph& graph,
52                                                                          Layer& layer,
53                                                                          bool expectCorrectInputType)
54 {
55     std::vector<ConvertBf16ToFp32Layer*> convertLayers;
56     convertLayers.reserve(layer.GetNumInputSlots());
57 
58     // Insert a ConvertBf16ToFp32Layer before each input slot
59     for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
60     {
61         bool allowInsert = true;
62         if (expectCorrectInputType)
63         {
64             // Only insert ConvertBf16ToFp32Layer before BF16 input slots
65             OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
66             allowInsert =
67                 connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::BFloat16;
68         }
69 
70         if (allowInsert)
71         {
72             const std::string name =
73                 std::string("convert_bf16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
74                 layer.GetName();
75             ConvertBf16ToFp32Layer* convertLayer =
76                 graph.InsertNewLayer<ConvertBf16ToFp32Layer>(*inputSlot, name.c_str());
77 
78             TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
79             convertInfo.SetDataType(DataType::Float32);
80 
81             convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
82 
83             convertLayers.emplace_back(convertLayer);
84         }
85     }
86 
87     return convertLayers;
88 }
89 
InsertConvertFp32ToBf16LayersBefore(Graph & graph,Layer & layer,bool expectCorrectInputType)90 std::vector<ConvertFp32ToBf16Layer*> InsertConvertFp32ToBf16LayersBefore(Graph& graph,
91                                                                          Layer& layer,
92                                                                          bool expectCorrectInputType)
93 {
94     std::vector<ConvertFp32ToBf16Layer*> convertLayers;
95     convertLayers.reserve(layer.GetNumInputSlots());
96 
97     // Insert a ConvertFp32ToBf16Layer before each input slot
98     for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
99     {
100         bool allowInsert = true;
101         if (expectCorrectInputType)
102         {
103             // Only insert ConvertFp32ToBf16Layer before FP32 input slots
104             OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
105             allowInsert =
106                 connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float32;
107         }
108 
109         if (allowInsert)
110         {
111             const std::string name =
112                 std::string("convert_fp32_to_bf16-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
113                 layer.GetName();
114             ConvertFp32ToBf16Layer* convertLayer =
115                 graph.InsertNewLayer<ConvertFp32ToBf16Layer>(*inputSlot, name.c_str());
116 
117             TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
118             convertInfo.SetDataType(DataType::BFloat16);
119 
120             convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
121 
122             convertLayers.emplace_back(convertLayer);
123         }
124     }
125 
126     return convertLayers;
127 }
128 
InsertConvertFp16ToFp32LayersBefore(Graph & graph,Layer & layer,bool expectCorrectInputType)129 std::vector<ConvertFp16ToFp32Layer*> InsertConvertFp16ToFp32LayersBefore(Graph& graph,
130                                                                          Layer& layer,
131                                                                          bool expectCorrectInputType)
132 {
133     std::vector<ConvertFp16ToFp32Layer*> convertLayers;
134     convertLayers.reserve(layer.GetNumInputSlots());
135 
136     // Insert a ConvertFp16ToFp32Layer before each input slot
137     for (auto&& inputSlot = layer.BeginInputSlots(); inputSlot != layer.EndInputSlots(); ++inputSlot)
138     {
139         bool allowInsert = true;
140         if (expectCorrectInputType)
141         {
142             // Only insert ConvertFp16ToFp32Layer before FP16 input slots
143             OutputSlot* connectedOutputSlot = inputSlot->GetConnectedOutputSlot();
144             allowInsert =
145                 connectedOutputSlot && connectedOutputSlot->GetTensorInfo().GetDataType() == DataType::Float16;
146         }
147 
148         if (allowInsert)
149         {
150             const std::string name =
151                 std::string("convert_fp16_to_fp32-" + std::to_string(inputSlot->GetSlotIndex()) + "-") +
152                 layer.GetName();
153             ConvertFp16ToFp32Layer* convertLayer =
154                 graph.InsertNewLayer<ConvertFp16ToFp32Layer>(*inputSlot, name.c_str());
155 
156             TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
157             convertInfo.SetDataType(DataType::Float32);
158 
159             convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
160 
161             convertLayers.emplace_back(convertLayer);
162         }
163     }
164 
165     return convertLayers;
166 }
167 
InsertConvertFp32ToBf16LayersAfter(Graph & graph,Layer & layer)168 std::vector<ConvertFp32ToBf16Layer*> InsertConvertFp32ToBf16LayersAfter(Graph& graph, Layer& layer)
169 {
170     const unsigned int numOutputSlots = layer.GetNumOutputSlots();
171 
172     std::vector<ConvertFp32ToBf16Layer*> convertLayers;
173     convertLayers.reserve(numOutputSlots);
174 
175     // Update Bf16 output slots to FP32 on current layer
176     ChangeOutputBf16ToFp32(layer);
177 
178     // Insert a ConvertFp32ToBf16Layer after each FP32 output slot
179     for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
180     {
181         OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
182         if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
183         {
184             const std::string name =
185                 std::string("convert_fp32_to_bf16-" + std::to_string(slotIndex) + "-") + layer.GetName();
186             ConvertFp32ToBf16Layer* convertLayer =
187                 graph.InsertNewLayer<ConvertFp32ToBf16Layer>(outputSlot, name.c_str());
188 
189             TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
190             convertInfo.SetDataType(DataType::BFloat16);
191 
192             convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
193 
194             convertLayers.emplace_back(convertLayer);
195         }
196     }
197 
198     return convertLayers;
199 }
200 
InsertConvertFp32ToFp16LayersAfter(Graph & graph,Layer & layer)201 std::vector<ConvertFp32ToFp16Layer*> InsertConvertFp32ToFp16LayersAfter(Graph& graph, Layer& layer)
202 {
203     const unsigned int numOutputSlots = layer.GetNumOutputSlots();
204 
205     std::vector<ConvertFp32ToFp16Layer*> convertLayers;
206     convertLayers.reserve(numOutputSlots);
207 
208     // Update FP16 output slots to FP32 on current layer
209     ChangeOutputFp16ToFp32(layer);
210 
211     // Insert a ConvertFp32ToFp16Layer after each FP32 output slot
212     for (unsigned int slotIndex = 0u; slotIndex < numOutputSlots; ++slotIndex)
213     {
214         OutputSlot& outputSlot = layer.GetOutputSlot(slotIndex);
215         if(outputSlot.GetTensorInfo().GetDataType() == DataType::Float32)
216         {
217             const std::string name =
218                 std::string("convert_fp32_to_fp16-" + std::to_string(slotIndex) + "-") + layer.GetName();
219             ConvertFp32ToFp16Layer* convertLayer =
220                 graph.InsertNewLayer<ConvertFp32ToFp16Layer>(outputSlot, name.c_str());
221 
222             TensorInfo convertInfo = convertLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
223             convertInfo.SetDataType(DataType::Float16);
224 
225             convertLayer->GetOutputSlot().SetTensorInfo(convertInfo);
226 
227             convertLayers.emplace_back(convertLayer);
228         }
229     }
230 
231     return convertLayers;
232 }
233 
InsertDebugLayerAfter(Graph & graph,Layer & layer)234 std::vector<DebugLayer*> InsertDebugLayerAfter(Graph& graph, Layer& layer)
235 {
236     std::vector<DebugLayer*> debugLayers;
237     debugLayers.reserve(layer.GetNumOutputSlots());
238 
239     // Connect a DebugLayer to each output slot of the layer
240     for (auto outputSlot = layer.BeginOutputSlots(); outputSlot != layer.EndOutputSlots(); ++outputSlot)
241     {
242         const std::string debugName = std::string("DebugLayerAfter") + layer.GetNameStr();
243 
244         DebugLayer* debugLayer =
245             graph.InsertNewLayer<DebugLayer>(*outputSlot, debugName.c_str());
246 
247         // Sets output tensor info for the debug layer.
248         ARMNN_ASSERT(debugLayer->GetInputSlot(0).GetConnectedOutputSlot() == &(*outputSlot));
249         TensorInfo debugInfo = debugLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
250 
251         debugLayer->GetOutputSlot().SetTensorInfo(debugInfo);
252 
253         // NOTE: It is OK to do this because DebugLayer is only supported on CpuRef
254         debugLayer->SetBackendId(Compute::CpuRef);
255 
256         debugLayers.emplace_back(debugLayer);
257     }
258 
259     return debugLayers;
260 }
261 
262 } // namespace armnn
263