1 /** 2 * Copyright 2020-2021 Huawei Technologies Co., Ltd 3 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 8 * http://www.apache.org/licenses/LICENSE-2.0 9 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_H_ 18 #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_H_ 19 20 #include <map> 21 #include <memory> 22 #include <string> 23 #include <unordered_map> 24 #include <unordered_set> 25 #include <utility> 26 #include <vector> 27 28 #include "backend/kernel_compiler/tbe/tbe_utils.h" 29 #include "backend/optimizer/somas/somas_node.h" 30 #include "backend/optimizer/somas/somas_solver_pre.h" 31 #include "backend/optimizer/somas/somas_stream.h" 32 #include "backend/optimizer/somas/somas_parameter.h" 33 #include "backend/session/anf_runtime_algorithm.h" 34 #include "backend/session/kernel_graph.h" 35 36 namespace mindspore { 37 namespace somas { 38 class Somas { 39 public: 40 // Constructors/Destructors 41 Somas() = default; 42 Somas(const Somas &) = delete; 43 Somas &operator=(const Somas &) = delete; 44 ~Somas() = default; 45 46 bool Allocate(const session::KernelGraph *graph); GetTotalMemSize()47 size_t GetTotalMemSize() { return mem_offset_; } set_mem_base_addr(uint8_t * mem_base_addr)48 void set_mem_base_addr(uint8_t *mem_base_addr) { mem_base_addr_ = mem_base_addr; } 49 uint8_t *GetNodeOutputPtr(const AnfNodePtr &node, size_t index) const; 50 uint8_t *GetNodeWorkSpacePtr(const AnfNodePtr &node, size_t index) const; 51 52 std::string SomasInfo(bool calc_hash = false) const; 53 std::string SomasMemory() const; 54 void DumpSomasInfoIR(const string filename) const; 55 void DumpSomasMemoryIR(const string &filename) const; 56 57 static bool NodeSort(const SomasNodePtr &node1, const SomasNodePtr &node2); 58 #ifndef ENABLE_SECURITY 59 void ConvertToProfilingNode(uint32_t graph_id); 60 #endif 61 62 private: 63 std::vector<DynamicBitSet> reuse_matrix_; 64 // hash id 65 std::string hash_id_; 66 // Maps 67 std::unordered_map<size_t, SomasTensorPtr> tensors_map_; 68 std::map<void *, std::vector<SomasNodePtr>> nodes_map_; 69 std::map<void *, vector<SomasParameterPtr>> parameters_map_; 70 71 // Vectors 72 std::vector<SomasNodePtr> nodes_list_; 73 std::vector<SomasStreamPtr> streams_list_; 74 std::vector<SomasTensorPtr> tensors_list_; 75 std::vector<SomasParameterPtr> parameters_list_; 76 77 // Stream groups 78 std::vector<vector<uint32_t>> streams_groups_; 79 80 // Solver 81 TensorsDescMap solver_tensor_desc_map_; 82 SomasSolverPrePtr somas_solver_; 83 84 // Contiguous list 85 std::vector<vector<size_t>> contiguous_tensors_list_; 86 87 // Ref lists 88 std::vector<vector<size_t>> ref_node_constraints_; 89 std::vector<vector<size_t>> ref_overlap_constraints_; 90 91 // total Offset 92 size_t mem_offset_{0}; 93 94 // Memory base addr 95 uint8_t *mem_base_addr_{nullptr}; 96 97 // Save debug info 98 bool save_graphs_{false}; 99 std::string save_graphs_path_; 100 101 // statistic info 102 size_t upper_bound_{0}; 103 size_t lower_bound_{0}; 104 size_t workspace_total_size_{0}; 105 size_t comm_input_total_size_{0}; 106 size_t comm_output_total_size_{0}; 107 size_t lifelong_all_total_size_{0}; 108 size_t lifelong_start_total_size_{0}; 109 size_t lifelong_end_total_size_{0}; 110 111 bool InitSomasTensors(const session::KernelGraph *graph); 112 void InitBasicInfo(const session::KernelGraph *graph); 113 void InitSomasStreamAndNode(const session::KernelGraph *graph); 114 void InitSomasOutputAndWorkspaceTensors(const session::KernelGraph *graph); 115 void InitSomasInputTensors(const session::KernelGraph *graph); 116 void GetNextOutputProcess(const session::KernelGraph *graph); 117 void IndependentNodeOutputProcess(const session::KernelGraph *graph); 118 #ifndef ENABLE_SECURITY 119 void SummaryInputProcess(const session::KernelGraph *graph); 120 #endif 121 void RefNodeProcess(const session::KernelGraph *graph); 122 void NonTaskSplitProcess(const session::KernelGraph *graph); 123 void UnReuseNodeProcess(const session::KernelGraph *graph); 124 SomasTensorPtr CreateGapTensor(size_t gap_tensor_id); 125 void GenContiguousList(const session::KernelGraph *graph); 126 127 void ComputeConflictPairs(); 128 129 bool Assign(const session::KernelGraph *graph); 130 131 std::string Offline() const; 132 void DumpOfflineIR(const string filename) const; 133 std::string GetSplitName(const string &scope_name) const; 134 size_t CalcLowerBound() const; 135 void GenGraphStatisticInfo(); 136 SomasParameterPtr GetSomasParameter(const AnfNodePtr &node, size_t index); 137 SomasParameterPtr CreateSomasParameter(const AnfNodePtr &node, size_t index); 138 void InitCommonNodeInputs(bool is_all_nop_node, const CNodePtr &kernel); 139 void InitAtomicCleanInputs(bool is_all_nop_node, const CNodePtr &kernel); 140 void ComputeOneTensorConflicts(const std::shared_ptr<SomasTensor> &calc_tensor, 141 const std::vector<SomasTensorPtr> &all_tensors_list, 142 const vector<DynamicBitSet> &nodes_dependency, 143 std::vector<DynamicBitSet> *tensor_relation) const; 144 void ComputeMultiTensorConflicts(const std::vector<SomasTensorPtr> &calc_tensors_list, 145 const std::vector<SomasTensorPtr> &all_tensors_list, 146 const vector<DynamicBitSet> &nodes_dependency, 147 std::vector<DynamicBitSet> *tensor_relation) const; 148 void UpdateTensorDestinations(); 149 void UpdateRefTensorsConflict(); 150 void UpdateRefOverlapTensorsConflicts(); 151 void UpdateRefTensorsOffset(); 152 void UpdateContiguousTensorsOffset(const std::map<size_t, size_t> &contiguous_ref_list_map); 153 void DumpParameters(std::ostringstream &oss) const; 154 void DumpTensors(std::ostringstream &oss) const; 155 void DumpNodes(std::ostringstream &oss) const; 156 std::map<size_t, size_t> GetContiguousListContainRefTensor(); 157 std::map<size_t, size_t> GetRefTensorsInContiguousList(); 158 bool SaveSomasResult(const session::KernelGraph *graph); 159 bool VerifySomasResult(const session::KernelGraph *graph, const nlohmann::json &somas_json) const; 160 bool LoadSomasResult(const session::KernelGraph *graph, const string &filename); 161 bool UpdateTensorsOffset(const std::vector<nlohmann::json> &tensors_json); 162 bool CalcSomasModelHash(const session::KernelGraph *graph); 163 void UpdateInputTensor(SomasNodePtr node, SomasNodePtr pre_somas_node, SomasTensorPtr input_somas_tensor) const; 164 bool LoadSomasCache(const session::KernelGraph *graph); 165 }; 166 167 using SomasPtr = std::shared_ptr<Somas>; 168 } // namespace somas 169 } // namespace mindspore 170 #endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_H_ 171