1 /** 2 * Copyright 2019-2023 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_DEVICE_ADDRESS_H_ 18 #define MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_DEVICE_ADDRESS_H_ 19 20 #include <string> 21 #include <vector> 22 #include <memory> 23 #include "include/backend/device_address.h" 24 #include "runtime/device/loadable_device_address.h" 25 #include "runtime/device/kernel_runtime.h" 26 #include "plugin/device/ascend/hal/device/ascend_memory_pool.h" 27 #include "plugin/device/ascend/hal/device/launch_transdata.h" 28 #include "ir/dtype.h" 29 #include "kernel/kernel.h" 30 #include "utils/shape_utils.h" 31 #include "acl/acl_rt.h" 32 33 namespace mindspore { 34 #ifdef ENABLE_DEBUGGER 35 class Debugger; 36 #endif 37 namespace device { 38 class LaunchKernel; 39 namespace ascend { 40 class AscendDeviceAddress : public LoadableDeviceAddress { 41 public: AscendDeviceAddress(const KernelTensorPtr & kernel_tensor)42 explicit AscendDeviceAddress(const KernelTensorPtr &kernel_tensor) : LoadableDeviceAddress(kernel_tensor) { 43 SetDevicePtrDeleter(); 44 } AscendDeviceAddress(void * ptr,size_t size)45 explicit AscendDeviceAddress(void *ptr, size_t size) : LoadableDeviceAddress(ptr, size) { SetDevicePtrDeleter(); } AscendDeviceAddress(void * ptr,size_t size,const std::string & device_name,uint32_t device_id)46 explicit AscendDeviceAddress(void *ptr, size_t size, const std::string &device_name, uint32_t device_id) 47 : LoadableDeviceAddress(ptr, size, device_name, device_id) { 48 SetDevicePtrDeleter(); 49 } AscendDeviceAddress(void * ptr,size_t size,const std::string & format,TypeId type_id,const std::string & device_name,uint32_t device_id)50 explicit AscendDeviceAddress(void *ptr, size_t size, const std::string &format, TypeId type_id, 51 const std::string &device_name, uint32_t device_id) 52 : LoadableDeviceAddress(ptr, size, format, type_id, device_name, device_id) { 53 SetDevicePtrDeleter(); 54 } AscendDeviceAddress(void * ptr,size_t size,const ShapeVector & shape_vector,const Format & format,TypeId type_id,const std::string & device_name,uint32_t device_id,uint32_t stream_id)55 AscendDeviceAddress(void *ptr, size_t size, const ShapeVector &shape_vector, const Format &format, TypeId type_id, 56 const std::string &device_name, uint32_t device_id, uint32_t stream_id) 57 : LoadableDeviceAddress(ptr, size, shape_vector, format, type_id, device_name, device_id, stream_id) { 58 SetDevicePtrDeleter(); 59 } AscendDeviceAddress(void * ptr,size_t size,const std::string & format,TypeId type_id,const KernelWithIndex & node_index,const std::string & device_name,uint32_t device_id)60 explicit AscendDeviceAddress(void *ptr, size_t size, const std::string &format, TypeId type_id, 61 const KernelWithIndex &node_index, const std::string &device_name, uint32_t device_id) 62 : LoadableDeviceAddress(ptr, size, format, type_id, node_index, device_name, device_id) { 63 SetDevicePtrDeleter(); 64 } AscendDeviceAddress(void * ptr,size_t size,const std::string & format,TypeId type_id)65 explicit AscendDeviceAddress(void *ptr, size_t size, const std::string &format, TypeId type_id) 66 : LoadableDeviceAddress(ptr, size, format, type_id) { 67 SetDevicePtrDeleter(); 68 } 69 ~AscendDeviceAddress() override; 70 void DeviceSynchronizerInit() override; 71 bool SyncDeviceToHost(size_t size, void *const host_ptr) const override; 72 bool SyncHostToDevice(size_t size, const void *host_ptr) const override; 73 bool SyncDeviceToHost(const ShapeVector &shape, size_t size, TypeId type, void *host_ptr) const override; 74 bool SyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr, 75 const std::string &format) const override; 76 bool SyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const std::string &format, 77 const tensor::TensorDataPtr &tensor_data) const override; 78 79 bool AsyncHostToDevice(size_t size, const void *host_ptr) const override; 80 81 bool AsyncDeviceToHost(size_t size, void *host_ptr) const override; 82 83 bool AsyncDeviceToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *src_ptr, 84 const std::string &format) const override; 85 bool SyncDeviceToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *src_ptr, 86 const std::string &format) const override; 87 bool AsyncHostToDevice(size_t size, TypeId /* type */, const void *host_ptr) const override; 88 bool SyncDeviceToDevice(const DeviceSync *src_device_addr) const override; 89 bool CopyDeviceToHost(void *dst, const void *src, const size_t &size) const override; 90 bool CopyHostToDevice(void *dst, const void *src, const size_t &size) const override; 91 void ClearDeviceMemory() override; GetDeviceType()92 DeviceType GetDeviceType() const override { return DeviceType::kAscend; } 93 #ifndef ENABLE_SECURITY 94 bool DumpMemToFile(const std::string &filepath, const std::string &host_fmt, const ShapeVector &host_shape, 95 TypeId host_type, bool trans_flag) const override; 96 #endif 97 #ifdef ENABLE_DEBUGGER 98 bool LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt, 99 const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev, 100 uint32_t root_graph_id, bool force_update, bool trans_flag, bool async_copy = true) const override; 101 #endif 102 103 // Asynchronously copy host memory to device side. 104 bool AsyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr, 105 size_t stream_id) const; 106 107 // Asynchronously copy device memory to host side. 108 bool AsyncDeviceToHost(const ShapeVector &shape, size_t size, TypeId type, void *host_ptr, size_t stream_id) const; 109 set_communication_ptr(uint8_t * communication_ptr)110 void set_communication_ptr(uint8_t *communication_ptr) override { 111 communication_ptr_ = communication_ptr; 112 // The communication_ptr_ should free to memory pool instead of GetDevicePtr(), so must update device pointer 113 // deleter. 114 SetDevicePtrDeleter(); 115 } 116 117 protected: 118 bool CopyDeviceToHost(void *dst, const void *src, size_t size, bool async, size_t stream_id) const override; 119 bool CopyHostToDevice(void *dst, const void *src, size_t size, bool async, size_t stream_id) const override; 120 121 bool DeviceToFileDirectly(void *ptr, size_t size, const std::string &file_name, size_t stream_id) const override; 122 123 bool FileToDeviceDirectly(void *ptr, size_t size, const std::string &file_name, size_t stream_id) const override; 124 125 void DeviceToDevice(void *dst, void *src, size_t size, size_t stream_id) const; 126 127 private: 128 bool SyncDeviceToHostAndConvertFormat(const ShapeVector &shape, size_t size, TypeId type, void *host_ptr) const; 129 bool ConvertFormatAndSyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr, 130 const tensor::TensorDataPtr &tensor_data) const; 131 bool SyncDeviceToHostAndConvertFormatBasedOnTransData(const ShapeVector &host_shape, size_t size, 132 mindspore::TypeId type, void *host_ptr) const; 133 bool SyncDeviceToDeviceWithDiffFormatType(const DeviceSync *src_device_addr) const; 134 135 bool SyncHostToDeviceImpl(const ShapeVector &shape, size_t size, mindspore::TypeId type, const void *host_ptr, 136 const std::string &format, const tensor::TensorDataPtr &tensor_data = nullptr) const; 137 void SyncStream() const; 138 bool SyncStream(size_t stream_id) const; 139 bool Float64ToFloatAndSyncHostToDevice(void *dst, size_t dst_size, const void *src, size_t src_size, 140 const tensor::TensorDataPtr &tensor_data) const; 141 bool SyncDeviceToHostAndFloatToFloat64(void *dst, size_t dst_size, const void *src, size_t src_size) const; 142 void SyncMemory(void *dst, const void *src, uint64_t size, aclrtMemcpyKind kind, 143 const tensor::TensorDataPtr &tensor_data = nullptr) const; 144 void SyncHostMemoryToDeviceWithCopySrc(void *dst, const void *src, uint64_t size, aclrtMemcpyKind kind, 145 KernelRuntime *runtime_instance) const; 146 void SyncHostMemoryToDeviceForTensorFromNumpy(void *dst, const void *src, uint64_t size, aclrtMemcpyKind kind, 147 KernelRuntime *runtime_instance) const; 148 void SyncHostMemoryToDeviceWithTensorData(void *dst, const void *src, uint64_t size, aclrtMemcpyKind kind, 149 const tensor::TensorDataPtr &tensor_data, 150 KernelRuntime *runtime_instance) const; 151 ShapeVector GetDeviceShape(ShapeVector *host_shape) const; 152 std::shared_ptr<LaunchTransData> CreateLaunchTransData(const ShapeVector &host_shape, const std::string &ori_format, 153 const std::string &dst_format) const; 154 mutable std::shared_ptr<LaunchTransData> launch_transdata_{nullptr}; 155 void BindDevice() const; 156 void CopyHostToDevice(const void *src, uint64_t size, const tensor::TensorDataPtr &tensor_data) const; 157 void CopyDeviceToHost(void *dst, uint64_t size) const; 158 bool CopyBetweenHostDevice(void *dst, const void *src, size_t size, bool async, size_t stream_id, 159 bool host_to_device) const; 160 bool CopyBetweenFileDeviceDirectly(void *ptr, const std::string &file_name, size_t size, size_t stream_id, 161 bool file_to_device) const; 162 163 // The 'const' for this class is irrational, but I abide by it 164 int64_t GetGroupsWithCache() const; 165 166 // Set a device pointer destructor to kernel tensor, used to release resource reclaiming of the device pointer 167 // automatically when DeviceAddress destructed. 168 void SetDevicePtrDeleter(); 169 170 mutable int64_t groups_ = 1; 171 172 // When the device address is used by communication node, create protect area [kMemAlignSize -- data -- kMemAlignSize] 173 // memory buffer, communication_ptr_(allocated from ascend memory pool) + kMemAlignSize = device pointer (could get by 174 // GetDevicePtr()), device pointer is to really used by communication node, and communication_ptr_ is used to free 175 // memory to Ascend memory pool. 176 uint8_t *communication_ptr_{nullptr}; 177 }; 178 using AscendDeviceAddressPtr = std::shared_ptr<AscendDeviceAddress>; 179 } // namespace ascend 180 } // namespace device 181 } // namespace mindspore 182 #endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_DEVICE_ADDRESS_H_ 183