1 /** 2 * Copyright 2020-2023 Huawei Technologies Co., Ltd 3 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 8 * http://www.apache.org/licenses/LICENSE-2.0 9 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_BACKEND_COMMON_SOMAS_SOMAS_SOLVER_PRE_H_ 18 #define MINDSPORE_CCSRC_BACKEND_COMMON_SOMAS_SOMAS_SOLVER_PRE_H_ 19 20 #include <algorithm> 21 #include <cstddef> 22 #include <cstring> 23 #include <iostream> 24 #include <map> 25 #include <memory> 26 #include <stack> 27 #include <vector> 28 #include <climits> 29 #include "utils/hash_map.h" 30 #include "include/backend/kernel_graph.h" 31 32 using mindspore::HashMap; 33 using std::vector; 34 35 namespace mindspore { 36 namespace somas { 37 constexpr char const *sortingNames[7] = {"size(>), index(<)", 38 "can_reuse_mem(<), size(>), index(>)", 39 "size(>), index(>)", 40 "size(>), constraints(<), index(<)", 41 "size(>), constraints(<), index(>)", 42 "size(>), constraints(>), index(<)", 43 "size(>), constraints(>), index(>)"}; 44 constexpr char const *branchingNames[4] = {"bestfit", "smallest", "largest", "worstfit"}; 45 constexpr char const *algorithmTypeNames[2] = {"Shared Objects", "Single Object"}; 46 constexpr auto kParallelComputeSizeThreshold = 2000; 47 constexpr auto kHalfByteSize = 4; 48 enum Status { FAILED, SUCCESS }; 49 enum AlgorithmType { kManyObjects = 0, kSingleObject, kNumAlgorithmTypes }; 50 enum SortingType { 51 kGreaterSizeSmallerIndex = 0, 52 kSmallerReusePeakMemGreaterSizeSmallerIndex, 53 #ifdef SOMAS_DEBUG 54 kGreaterSizeGreaterIndex, 55 kGreaterSizeSmallerConstraintsSmallerIndex, 56 kGreaterSizeSmallerConstraintsGreaterIndex, 57 kGreaterSizeGreaterConstraintsSmallerIndex, 58 kGreaterSizeGreaterConstraintsGreaterIndex, 59 #endif 60 kNumSortingTypes 61 }; 62 enum FittingType { 63 kBest = 0, 64 kSmallest, 65 #ifdef SOMAS_DEBUG 66 kLargest, 67 kWorst, 68 #endif 69 kNumFittingTypes 70 }; 71 72 struct BestInfo { 73 size_t best_sol, worst, best, best_timing; 74 AlgorithmType best_algo; BestInfoBestInfo75 BestInfo() : best_sol(0), worst(0), best(SIZE_MAX), best_timing(SIZE_MAX), best_algo(kManyObjects) {} 76 }; 77 78 class DynamicBitSet { GetIndex(size_t index)79 inline size_t GetIndex(size_t index) const { return index / bit_width_; } 80 GetBitMask(size_t index)81 inline uint64_t GetBitMask(size_t index) const { 82 return ((static_cast<uint64_t>(0x1)) << ((bit_width_ - 1) - (index % bit_width_))); 83 } 84 Reset(uint64_t val)85 inline void Reset(uint64_t val) { 86 bit_.clear(); 87 bit_.assign(bit_size_, val); 88 } 89 90 public: 91 static constexpr size_t bit_width_ = 64; 92 size_t bit_size_; 93 std::vector<uint64_t> bit_; DynamicBitSet(size_t count)94 explicit DynamicBitSet(size_t count) : bit_size_((count + bit_width_ - 1) / bit_width_) { Reset(0x0); } 95 96 ~DynamicBitSet() = default; 97 98 DynamicBitSet(const DynamicBitSet &) = default; 99 DynamicBitSet &operator=(const DynamicBitSet &) = default; 100 101 void SetBitTrue(size_t index, bool log = false) { 102 if (log) { 103 MS_LOG(INFO) << GetIndex(index) << " " << GetBitMask(index); 104 } 105 bit_[GetIndex(index)] |= GetBitMask(index); 106 } 107 SetBitFalse(size_t index)108 void SetBitFalse(size_t index) { bit_[GetIndex(index)] &= (~GetBitMask(index)); } 109 IsBitTrue(size_t index)110 bool IsBitTrue(size_t index) const { return (bit_[GetIndex(index)] & GetBitMask(index)) != 0x0; } 111 CountOnesNum()112 size_t CountOnesNum() const { 113 size_t ret = 0; 114 static unsigned char ones_num_in_hex[] = "\0\1\1\2\1\2\2\3\1\2\2\3\2\3\3\4"; 115 for (size_t i = 0; i < bit_size_; i++) { 116 auto value = bit_[i]; 117 if (value == 0) { 118 continue; 119 } 120 auto *char_value = reinterpret_cast<unsigned char *>(&value); 121 for (size_t j = 0; j < bit_width_ / CHAR_BIT; j++) { 122 ret += ones_num_in_hex[static_cast<int>(char_value[j] & 0xF)]; 123 char_value[j] >>= kHalfByteSize; 124 ret += ones_num_in_hex[static_cast<int>(char_value[j] & 0xF)]; 125 } 126 } 127 return ret; 128 } 129 Log()130 void Log() { 131 std::cout << "Start Print Bitset "; 132 for (size_t i = 0; i < bit_size_; i++) { 133 std::cout << " bit [" << std::dec << i << "] = " << std::hex << bit_[i] << std::dec; 134 } 135 std::cout << std::endl; 136 } 137 Union(DynamicBitSet * a,DynamicBitSet * b)138 friend void Union(DynamicBitSet *a, DynamicBitSet *b) { 139 for (size_t i = 0; i < (*a).bit_size_; i++) { 140 (*a).bit_[i] |= (*b).bit_[i]; 141 } 142 } 143 And(DynamicBitSet * a,DynamicBitSet * b)144 friend void And(DynamicBitSet *a, DynamicBitSet *b) { 145 for (size_t i = 0; i < (*a).bit_size_; i++) { 146 (*a).bit_[i] &= (*b).bit_[i]; 147 } 148 } 149 }; 150 151 class VectorBitSet { 152 public: VectorBitSet(size_t count)153 explicit VectorBitSet(size_t count) : bit_(count, false), bit_size_(count) {} 154 ~VectorBitSet() = default; 155 156 VectorBitSet(const VectorBitSet &) = default; 157 VectorBitSet &operator=(const VectorBitSet &) = default; 158 SetBitTrue(size_t index)159 void SetBitTrue(size_t index) { bit_[index] = true; } 160 SetBitFalse(size_t index)161 void SetBitFalse(size_t index) { bit_[index] = false; } 162 SetBit(size_t index,bool value)163 void SetBit(size_t index, bool value) { bit_[index] = value; } 164 IsBitTrue(size_t index)165 bool IsBitTrue(size_t index) const { return bit_[index]; } 166 167 std::vector<bool> bit_; 168 size_t bit_size_; 169 }; 170 171 struct SomasSolverTensorDesc { 172 size_t index_; 173 size_t size_; 174 size_t offset_; 175 bool lifelong_; 176 size_t can_reuse_peak_mem_; 177 bool is_graph_output_; 178 using SomasSolverTensorDescPtr = std::shared_ptr<SomasSolverTensorDesc>; 179 SomasSolverTensorDescPtr right_; 180 SomasSolverTensorDescPtr left_; 181 bool blocked_; 182 183 SomasSolverTensorDesc() = default; 184 SomasSolverTensorDescSomasSolverTensorDesc185 SomasSolverTensorDesc(size_t index, size_t size, size_t offset, bool blifelong) 186 : index_(index), 187 size_(size), 188 offset_(offset), 189 lifelong_(blifelong), 190 can_reuse_peak_mem_(0), 191 is_graph_output_(false), 192 right_(nullptr), 193 left_(nullptr), 194 blocked_(false) {} 195 UpdateSomasSolverTensorDesc196 void Update(size_t index, size_t size, size_t offset, size_t can_reuse_peak_mem, bool is_graph_output, 197 bool blifelong) { 198 index_ = index; 199 size_ = size; 200 offset_ = offset; 201 lifelong_ = blifelong; 202 can_reuse_peak_mem_ = can_reuse_peak_mem; 203 is_graph_output_ = is_graph_output; 204 } 205 206 friend std::ostream &operator<<(std::ostream &out, const SomasSolverTensorDescPtr n) { 207 out << n->index_ << " " << n->size_ << " " << n->offset_ << "\n"; 208 return out; 209 } 210 friend std::istream &operator>>(std::istream &in, const SomasSolverTensorDescPtr &n) { 211 in >> n->index_ >> n->size_ >> n->offset_; 212 return in; 213 } 214 }; 215 using SomasSolverTensorDescPtr = std::shared_ptr<SomasSolverTensorDesc>; 216 typedef mindspore::HashMap<size_t, SomasSolverTensorDescPtr> TensorsDescMap; 217 class SomasSolverPre { 218 public: 219 SomasSolverPre() = default; 220 ~SomasSolverPre() = default; 221 222 SomasSolverPre(const SomasSolverPre &) = delete; 223 SomasSolverPre &operator=(const SomasSolverPre &) = delete; 224 GetMaxOffset()225 size_t GetMaxOffset() const { return max_offset_; } 226 227 Status Solving(const session::KernelGraph &graph, TensorsDescMap *ptensors, 228 const std::vector<VectorBitSet> *pConstraints, const vector<vector<size_t>> &continuous_v, 229 const std::vector<int> &core_list, 230 bool bVerifySolution, // true -> Check continuous and non overlapping constraints solution 231 bool ball = true, // true -> run full set of heuristics, false -> run single heuristic specified 232 SortingType sorting = kGreaterSizeSmallerIndex, FittingType fitting = kBest, 233 AlgorithmType algorithm = kManyObjects); 234 235 void Log(const session::KernelGraph &graph, const TensorsDescMap &tensors, 236 const std::vector<VectorBitSet> *pConstraints, const vector<vector<size_t>> &continuous_v) const; 237 238 Status CheckTensors(const TensorsDescMap *pTensors, uint32_t index1, uint32_t index2) const; 239 Status AddContiguousInfoInMap(const vector<vector<size_t>> &continuous_v, TensorsDescMap *pTensors) const; 240 Status AddContiguousInfoInMultiMaps(const vector<vector<size_t>> &continuous_v, vector<TensorsDescMap> *vecTensorsMap, 241 const TensorsDescMap *pTensors) const; 242 243 private: 244 size_t max_offset_; 245 void SolverInputLog(const session::KernelGraph &graph, const TensorsDescMap &tensors, 246 const vector<vector<size_t>> &continuous_v) const; 247 void SolverOutputLog(const session::KernelGraph &graph, const TensorsDescMap &tensors) const; 248 vector<TensorsDescMap> CreateTensorsMaps(const TensorsDescMap &tensors, size_t total_sol) const; 249 void TensorRelationLog(const std::vector<VectorBitSet> *pConstraints, const session::KernelGraph &graph) const; 250 }; 251 using SomasSolverPrePtr = std::shared_ptr<SomasSolverPre>; 252 } // namespace somas 253 } // namespace mindspore 254 255 #endif // MINDSPORE_CCSRC_BACKEND_COMMON_SOMAS_SOMAS_SOLVER_PRE_H_ 256