1 /** 2 * Copyright 2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_BUCKET_H_ 18 #define MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_BUCKET_H_ 19 20 #include <vector> 21 #include <utility> 22 #include <string> 23 #include <memory> 24 #include "ir/anf.h" 25 #include "ir/device_event.h" 26 #include "runtime/device/launch_kernel.h" 27 #include "runtime/device/device_address.h" 28 #include "backend/session/kernel_graph.h" 29 30 namespace mindspore::device { 31 class Bucket { 32 public: Bucket(uint32_t id,uint32_t bucket_size)33 Bucket(uint32_t id, uint32_t bucket_size) 34 : id_(id), 35 bucket_size_(bucket_size), 36 full_(false), 37 stream_(nullptr), 38 compute_stream_(nullptr), 39 pre_event_(nullptr), 40 post_event_(nullptr), 41 launch_mul_(nullptr), 42 launch_atomic_clean_(nullptr), 43 total_size_(0), 44 ar_input_addr_(nullptr), 45 ar_output_addr_(nullptr) {} 46 virtual ~Bucket() = default; 47 id()48 uint32_t id() const { return id_; } full()49 bool full() const { return full_; } 50 void Launch(); 51 void Release(); 52 void AddGradTensor(const tensor::TensorPtr &tensor); 53 virtual void Init(const std::vector<void *> &compute_streams, const std::vector<void *> &communication_streams) = 0; 54 55 protected: 56 uint32_t id_; 57 uint32_t bucket_size_; 58 bool full_; 59 void *stream_; 60 void *compute_stream_; 61 62 std::shared_ptr<DeviceEvent> pre_event_; 63 std::shared_ptr<DeviceEvent> post_event_; 64 std::shared_ptr<LaunchKernel> launch_mul_; 65 std::shared_ptr<LaunchKernel> launch_atomic_clean_; 66 67 size_t total_size_; 68 uint8_t *ar_input_addr_; 69 uint8_t *ar_output_addr_; 70 std::string group_; 71 std::vector<size_t> align_size_list_; 72 std::vector<tensor::TensorPtr> grad_tensor_list_; 73 std::vector<uint8_t *> new_tensor_output_addrs_; 74 std::vector<kernel::AddressPtr> memcpy_input_addrs_; 75 std::vector<kernel::AddressPtr> memcpy_output_addrs_; 76 std::vector<TypeId> tensor_type_list_; 77 std::vector<void *> tensor_old_addr_list_; 78 79 virtual void AllocateAllReduceAddr() = 0; 80 void UpdateTensorAddr(); 81 void CalculateMean(); 82 virtual std::shared_ptr<LaunchKernel> CreateLaunchMul() = 0; 83 virtual void LaunchAllReduce() = 0; 84 virtual void FreeAllDeviceMem() = 0; 85 virtual void FreeDeviceMem(void *dev_ptr) = 0; 86 virtual void CopyTensorToContiguousMemory() = 0; 87 void UpdateTensorOutputAddr(uint8_t *addr); 88 void LazyDeleteOldAddr(); 89 }; 90 } // namespace mindspore::device 91 92 #endif // MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_BUCKET_H_ 93