1 /** 2 * Copyright 2021-2022 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_LITE_TOOLS_OPTIMIZER_PARALLEL_SPLIT_STRATEGY_H_ 18 #define MINDSPORE_LITE_TOOLS_OPTIMIZER_PARALLEL_SPLIT_STRATEGY_H_ 19 #include <vector> 20 #include <string> 21 #include <set> 22 #include <utility> 23 #include <map> 24 #include <unordered_map> 25 #include "schema/ops_generated.h" 26 #include "mindspore/core/ops/conv_pool_ops.h" 27 #include "mindspore/core/ops/lite_ops.h" 28 #include "include/lite_types.h" 29 30 namespace mindspore { 31 namespace opt { 32 constexpr auto PARALLEL_NAME_SUFFIX = "_parallel"; 33 34 constexpr auto kParallelPrimitiveIndex = 0; 35 36 const std::vector<int64_t> kSplitDefaultRatio = {0, 0}; 37 38 // user's device to split, only split to cpu && gpu, no support npu 39 const std::vector<std::string> kSplitDevTypes = {"cpu", "gpu"}; 40 41 using Strategies = std::vector<std::vector<std::vector<int64_t>>>; 42 43 constexpr auto kDeviceTypeNone = -1; 44 // strategy format is NHWC-KHWC 45 constexpr int32_t kAxisN = 0; 46 constexpr int32_t kAxisCIn = 3; 47 constexpr int32_t kAxisCOut = 0; 48 constexpr int32_t kAxisH = 1; 49 constexpr int32_t kAxisW = 2; 50 51 constexpr auto kDefaultBatch = 1; 52 53 constexpr auto kShapeN = 0; 54 constexpr auto kShapeH = 1; 55 constexpr auto kShapeW = 2; 56 constexpr auto kShapeC = 3; 57 58 constexpr auto kIndexH = 0; 59 constexpr auto kIndexW = 1; 60 61 constexpr auto kPadUp = 0; 62 constexpr auto kPadDown = 1; 63 constexpr auto kPadLeft = 2; 64 constexpr auto kPadRight = 3; 65 66 enum SplitMode { 67 NoSplit = 0, 68 SplitN = 1, 69 SplitH = 2, 70 SplitCIN = 3, 71 SplitCOUT = 4, 72 }; 73 74 struct SplitStrategy { 75 Strategies strategys{}; 76 std::vector<std::string> dev_types{}; 77 size_t dev_num{0}; 78 SplitMode split_mode_{NoSplit}; 79 }; 80 81 // this is a map for key: <primitive,is_depth_wise> value: parallel_op_name 82 const std::map<std::pair<PrimitivePtr, bool>, std::string> kParallelOpNames = { 83 {{prim::kPrimConv2D, false}, "Conv2D"}, 84 {{prim::kPrimConv2DFusion, false}, "Conv2D"}, 85 {{prim::kPrimConv2D, true}, "DepthwiseConv2D"}, 86 {{prim::kPrimConv2DFusion, true}, "DepthwiseConv2D"}}; 87 88 const std::map<std::string, lite::DeviceType> kSupportSplitedDevices = { 89 {"cpu", lite::DeviceType::DT_CPU}, {"gpu", lite::DeviceType::DT_GPU}, {"npu", lite::DeviceType::DT_NPU}}; 90 91 // this is a map for key: primitive value: schema_primitive_id 92 const std::unordered_map<PrimitivePtr, std::pair<schema::PrimitiveType, TypeId>> kParallelSchemaId = { 93 {prim::kPrimConv2D, {schema::PrimitiveType_Conv2DFusion, kNumberTypeFloat32}}, 94 {prim::kPrimConv2DFusion, {schema::PrimitiveType_Conv2DFusion, kNumberTypeFloat32}}}; 95 96 // this is an artificial restriction that if user split conv, we limit total FLOPs bigger than 97 // 2 * output_H * output_W * (in_C * kW * kH +1) * out_C >= 100 98 // FLOPs ~= output_H * output_W * (in_C * kW * kH) * out_C 99 // FLOPs ~= (input_h/stride_h)*(input_w/stride_w)*in_C * kW * kH) * out_C 100 // etc. (12/1)*(12/1)*(1*3*3)*128/1024 = 162kFLPOs 101 constexpr auto kUserFLOPs = 100; 102 constexpr auto kPerFlops = 1024; 103 104 int64_t ApproximateFLOPs(const std::vector<int64_t> &strides, const std::vector<int64_t> &input_shae, 105 const std::vector<int64_t> &weight_shape); 106 107 std::unordered_map<std::string, opt::SplitStrategy> ParserSplitStrategy( 108 const std::vector<int64_t> ¶llel_compute_rates, const std::vector<std::string> ¶llel_devices, 109 SplitMode split_mode); 110 111 } // namespace opt 112 } // namespace mindspore 113 #endif // MINDSPORE_LITE_TOOLS_OPTIMIZER_PARALLEL_SPLIT_STRATEGY_H_ 114