• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3 
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7 
8  * http://www.apache.org/licenses/LICENSE-2.0
9 
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15 */
16 
17 #ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_H_
18 #define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_H_
19 
20 #include <map>
21 #include <memory>
22 #include <string>
23 #include <unordered_map>
24 #include <unordered_set>
25 #include <utility>
26 #include <vector>
27 
28 #include "backend/kernel_compiler/tbe/tbe_utils.h"
29 #include "backend/optimizer/somas/somas_node.h"
30 #include "backend/optimizer/somas/somas_solver_pre.h"
31 #include "backend/optimizer/somas/somas_stream.h"
32 #include "backend/optimizer/somas/somas_parameter.h"
33 #include "backend/session/anf_runtime_algorithm.h"
34 #include "backend/session/kernel_graph.h"
35 
36 namespace mindspore {
37 namespace somas {
38 class Somas {
39  public:
40   // Constructors/Destructors
41   Somas() = default;
42   Somas(const Somas &) = delete;
43   Somas &operator=(const Somas &) = delete;
44   ~Somas() = default;
45 
46   bool Allocate(const session::KernelGraph *graph);
GetTotalMemSize()47   size_t GetTotalMemSize() { return mem_offset_; }
set_mem_base_addr(uint8_t * mem_base_addr)48   void set_mem_base_addr(uint8_t *mem_base_addr) { mem_base_addr_ = mem_base_addr; }
49   uint8_t *GetNodeOutputPtr(const AnfNodePtr &node, size_t index) const;
50   uint8_t *GetNodeWorkSpacePtr(const AnfNodePtr &node, size_t index) const;
51 
52   std::string SomasInfo(bool calc_hash = false) const;
53   std::string SomasMemory() const;
54   void DumpSomasInfoIR(const string filename) const;
55   void DumpSomasMemoryIR(const string &filename) const;
56 
57   static bool NodeSort(const SomasNodePtr &node1, const SomasNodePtr &node2);
58 #ifndef ENABLE_SECURITY
59   void ConvertToProfilingNode(uint32_t graph_id);
60 #endif
61 
62  private:
63   std::vector<DynamicBitSet> reuse_matrix_;
64   // hash id
65   std::string hash_id_;
66   // Maps
67   std::unordered_map<size_t, SomasTensorPtr> tensors_map_;
68   std::map<void *, std::vector<SomasNodePtr>> nodes_map_;
69   std::map<void *, vector<SomasParameterPtr>> parameters_map_;
70 
71   // Vectors
72   std::vector<SomasNodePtr> nodes_list_;
73   std::vector<SomasStreamPtr> streams_list_;
74   std::vector<SomasTensorPtr> tensors_list_;
75   std::vector<SomasParameterPtr> parameters_list_;
76 
77   // Stream groups
78   std::vector<vector<uint32_t>> streams_groups_;
79 
80   // Solver
81   TensorsDescMap solver_tensor_desc_map_;
82   SomasSolverPrePtr somas_solver_;
83 
84   // Contiguous list
85   std::vector<vector<size_t>> contiguous_tensors_list_;
86 
87   // Ref lists
88   std::vector<vector<size_t>> ref_node_constraints_;
89   std::vector<vector<size_t>> ref_overlap_constraints_;
90 
91   // total Offset
92   size_t mem_offset_{0};
93 
94   // Memory base addr
95   uint8_t *mem_base_addr_{nullptr};
96 
97   // Save debug info
98   bool save_graphs_{false};
99   std::string save_graphs_path_;
100 
101   // statistic info
102   size_t upper_bound_{0};
103   size_t lower_bound_{0};
104   size_t workspace_total_size_{0};
105   size_t comm_input_total_size_{0};
106   size_t comm_output_total_size_{0};
107   size_t lifelong_all_total_size_{0};
108   size_t lifelong_start_total_size_{0};
109   size_t lifelong_end_total_size_{0};
110 
111   bool InitSomasTensors(const session::KernelGraph *graph);
112   void InitBasicInfo(const session::KernelGraph *graph);
113   void InitSomasStreamAndNode(const session::KernelGraph *graph);
114   void InitSomasOutputAndWorkspaceTensors(const session::KernelGraph *graph);
115   void InitSomasInputTensors(const session::KernelGraph *graph);
116   void GetNextOutputProcess(const session::KernelGraph *graph);
117   void IndependentNodeOutputProcess(const session::KernelGraph *graph);
118 #ifndef ENABLE_SECURITY
119   void SummaryInputProcess(const session::KernelGraph *graph);
120 #endif
121   void RefNodeProcess(const session::KernelGraph *graph);
122   void NonTaskSplitProcess(const session::KernelGraph *graph);
123   void UnReuseNodeProcess(const session::KernelGraph *graph);
124   SomasTensorPtr CreateGapTensor(size_t gap_tensor_id);
125   void GenContiguousList(const session::KernelGraph *graph);
126 
127   void ComputeConflictPairs();
128 
129   bool Assign(const session::KernelGraph *graph);
130 
131   std::string Offline() const;
132   void DumpOfflineIR(const string filename) const;
133   std::string GetSplitName(const string &scope_name) const;
134   size_t CalcLowerBound() const;
135   void GenGraphStatisticInfo();
136   SomasParameterPtr GetSomasParameter(const AnfNodePtr &node, size_t index);
137   SomasParameterPtr CreateSomasParameter(const AnfNodePtr &node, size_t index);
138   void InitCommonNodeInputs(bool is_all_nop_node, const CNodePtr &kernel);
139   void InitAtomicCleanInputs(bool is_all_nop_node, const CNodePtr &kernel);
140   void ComputeOneTensorConflicts(const std::shared_ptr<SomasTensor> &calc_tensor,
141                                  const std::vector<SomasTensorPtr> &all_tensors_list,
142                                  const vector<DynamicBitSet> &nodes_dependency,
143                                  std::vector<DynamicBitSet> *tensor_relation) const;
144   void ComputeMultiTensorConflicts(const std::vector<SomasTensorPtr> &calc_tensors_list,
145                                    const std::vector<SomasTensorPtr> &all_tensors_list,
146                                    const vector<DynamicBitSet> &nodes_dependency,
147                                    std::vector<DynamicBitSet> *tensor_relation) const;
148   void UpdateTensorDestinations();
149   void UpdateRefTensorsConflict();
150   void UpdateRefOverlapTensorsConflicts();
151   void UpdateRefTensorsOffset();
152   void UpdateContiguousTensorsOffset(const std::map<size_t, size_t> &contiguous_ref_list_map);
153   void DumpParameters(std::ostringstream &oss) const;
154   void DumpTensors(std::ostringstream &oss) const;
155   void DumpNodes(std::ostringstream &oss) const;
156   std::map<size_t, size_t> GetContiguousListContainRefTensor();
157   std::map<size_t, size_t> GetRefTensorsInContiguousList();
158   bool SaveSomasResult(const session::KernelGraph *graph);
159   bool VerifySomasResult(const session::KernelGraph *graph, const nlohmann::json &somas_json) const;
160   bool LoadSomasResult(const session::KernelGraph *graph, const string &filename);
161   bool UpdateTensorsOffset(const std::vector<nlohmann::json> &tensors_json);
162   bool CalcSomasModelHash(const session::KernelGraph *graph);
163   void UpdateInputTensor(SomasNodePtr node, SomasNodePtr pre_somas_node, SomasTensorPtr input_somas_tensor) const;
164   bool LoadSomasCache(const session::KernelGraph *graph);
165 };
166 
167 using SomasPtr = std::shared_ptr<Somas>;
168 }  // namespace somas
169 }  // namespace mindspore
170 #endif  // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_SOMAS_SOMAS_H_
171